Home | History | Annotate | Download | only in swapfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/param.h>
     28 #include <sys/systm.h>
     29 #include <sys/errno.h>
     30 #include <sys/kmem.h>
     31 #include <sys/vnode.h>
     32 #include <sys/vfs_opreg.h>
     33 #include <sys/swap.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/buf.h>
     36 #include <sys/callb.h>
     37 #include <sys/debug.h>
     38 #include <vm/seg.h>
     39 #include <sys/fs/swapnode.h>
     40 #include <fs/fs_subr.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/mem_config.h>
     43 #include <sys/atomic.h>
     44 
     45 extern const fs_operation_def_t swap_vnodeops_template[];
     46 
     47 /*
     48  * swapfs_minfree is the amount of physical memory (actually remaining
     49  * availrmem) that we want to keep free for the rest of the system.  This
     50  * means that swapfs can only grow to availrmem - swapfs_minfree.  This
     51  * can be set as just constant value or a certain percentage of installed
     52  * physical memory. It is set in swapinit().
     53  *
     54  * Users who want to change the amount of memory that can be used as swap
     55  * space should do so by setting swapfs_desfree at boot time,
     56  * not swapfs_minfree.
     57  */
     58 
     59 pgcnt_t swapfs_desfree = 0;
     60 pgcnt_t swapfs_minfree = 0;
     61 pgcnt_t swapfs_reserve = 0;
     62 
     63 #ifdef SWAPFS_DEBUG
     64 int swapfs_debug;
     65 #endif /* SWAPFS_DEBUG */
     66 
     67 
     68 static int swapfs_vpcount;
     69 static kmutex_t swapfs_lock;
     70 static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist;
     71 
     72 static struct vnode **swap_vnodes;	/* ptr's to swap vnodes */
     73 
     74 static void swap_init_mem_config(void);
     75 
     76 static pgcnt_t initial_swapfs_desfree;
     77 static pgcnt_t initial_swapfs_minfree;
     78 static pgcnt_t initial_swapfs_reserve;
     79 
     80 static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr);
     81 
     82 static void
     83 swapfs_recalc_save_initial(void)
     84 {
     85 	initial_swapfs_desfree = swapfs_desfree;
     86 	initial_swapfs_minfree = swapfs_minfree;
     87 	initial_swapfs_reserve = swapfs_reserve;
     88 }
     89 
     90 static int
     91 swapfs_recalc(pgcnt_t pgs)
     92 {
     93 	pgcnt_t new_swapfs_desfree;
     94 	pgcnt_t new_swapfs_minfree;
     95 	pgcnt_t new_swapfs_reserve;
     96 
     97 	new_swapfs_desfree = initial_swapfs_desfree;
     98 	new_swapfs_minfree = initial_swapfs_minfree;
     99 	new_swapfs_reserve = initial_swapfs_reserve;
    100 
    101 	if (new_swapfs_desfree == 0)
    102 		new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */;
    103 
    104 	if (new_swapfs_minfree == 0) {
    105 		/*
    106 		 * We set this lower than we'd like here, 2Mb, because we
    107 		 * always boot on swapfs. It's up to a safer value,
    108 		 * swapfs_desfree, when/if we add physical swap devices
    109 		 * in swapadd(). Users who want to change the amount of
    110 		 * memory that can be used as swap space should do so by
    111 		 * setting swapfs_desfree at boot time, not swapfs_minfree.
    112 		 * However, swapfs_minfree is tunable by install as a
    113 		 * workaround for bugid 1147463.
    114 		 */
    115 		new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
    116 	}
    117 
    118 	/*
    119 	 * priv processes can reserve memory as swap as long as availrmem
    120 	 * remains greater than swapfs_minfree; in the case of non-priv
    121 	 * processes, memory can be reserved as swap only if availrmem
    122 	 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
    123 	 * swapfs_reserve amount of memswap is not available to non-priv
    124 	 * processes. This protects daemons such as automounter dying
    125 	 * as a result of application processes eating away almost entire
    126 	 * membased swap. This safeguard becomes useless if apps are run
    127 	 * with root access.
    128 	 *
    129 	 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever
    130 	 * is greater up to the limit of 128 MB.
    131 	 */
    132 	if (new_swapfs_reserve == 0)
    133 		new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024),
    134 		    MAX(btopr(4 * 1024 * 1024), pgs >> 7));
    135 
    136 	/* Test basic numeric viability. */
    137 	if (new_swapfs_minfree > pgs)
    138 		return (0);
    139 
    140 	/* Equivalent test to anon_resvmem() check. */
    141 	if (availrmem < new_swapfs_minfree) {
    142 		/*
    143 		 * If ism pages are being used, then there must be agreement
    144 		 * between these two policies.
    145 		 */
    146 		if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) {
    147 			new_swapfs_minfree = segspt_minfree;
    148 		} else {
    149 			return (0);
    150 		}
    151 	}
    152 
    153 	swapfs_desfree = new_swapfs_desfree;
    154 	swapfs_minfree = new_swapfs_minfree;
    155 	swapfs_reserve = new_swapfs_reserve;
    156 
    157 	return (1);
    158 }
    159 
    160 /*ARGSUSED1*/
    161 int
    162 swapinit(int fstype, char *name)
    163 {							/* reserve for mp */
    164 	ssize_t sw_freelist_size = klustsize / PAGESIZE * 2;
    165 	int i, error;
    166 
    167 	static const fs_operation_def_t swap_vfsops[] = {
    168 		VFSNAME_SYNC, { .vfs_sync = swap_sync },
    169 		NULL, NULL
    170 	};
    171 
    172 	SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0);
    173 	mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL);
    174 
    175 	swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *),
    176 	    KM_SLEEP);
    177 
    178 	swapfs_recalc_save_initial();
    179 	if (!swapfs_recalc(physmem))
    180 		cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)",
    181 		    swapfs_minfree, physmem);
    182 
    183 	/*
    184 	 * Arrange for a callback on memory size change.
    185 	 */
    186 	swap_init_mem_config();
    187 
    188 	sw_ar = (struct async_reqs *)
    189 	    kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP);
    190 
    191 	error = vfs_setfsops(fstype, swap_vfsops, NULL);
    192 	if (error != 0) {
    193 		cmn_err(CE_WARN, "swapinit: bad vfs ops template");
    194 		return (error);
    195 	}
    196 
    197 	error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops);
    198 	if (error != 0) {
    199 		(void) vfs_freevfsops_by_type(fstype);
    200 		cmn_err(CE_WARN, "swapinit: bad vnode ops template");
    201 		return (error);
    202 	}
    203 	sw_freelist = sw_ar;
    204 	for (i = 0; i < sw_freelist_size - 1; i++)
    205 		sw_ar[i].a_next = &sw_ar[i + 1];
    206 
    207 	return (0);
    208 }
    209 
    210 /*
    211  * Get a swapfs vnode corresponding to the specified identifier.
    212  */
    213 struct vnode *
    214 swapfs_getvp(ulong_t vidx)
    215 {
    216 	struct vnode *vp;
    217 
    218 	vp = swap_vnodes[vidx];
    219 	if (vp) {
    220 		return (vp);
    221 	}
    222 
    223 	mutex_enter(&swapfs_lock);
    224 	vp = swap_vnodes[vidx];
    225 	if (vp == NULL) {
    226 		vp = vn_alloc(KM_SLEEP);
    227 		vn_setops(vp, swap_vnodeops);
    228 		vp->v_type = VREG;
    229 		vp->v_flag |= (VISSWAP|VISSWAPFS);
    230 		swap_vnodes[vidx] = vp;
    231 		swapfs_vpcount++;
    232 	}
    233 	mutex_exit(&swapfs_lock);
    234 	return (vp);
    235 }
    236 
    237 int swap_lo;
    238 
    239 /*ARGSUSED*/
    240 static int
    241 swap_sync(struct vfs *vfsp, short flag, struct cred *cr)
    242 {
    243 	struct vnode *vp;
    244 	int i;
    245 
    246 	if (!(flag & SYNC_ALL))
    247 		return (1);
    248 
    249 	/*
    250 	 * assumes that we are the only one left to access this so that
    251 	 * no need to use swapfs_lock (since it's staticly defined)
    252 	 */
    253 	for (i = 0; i < MAX_SWAP_VNODES; i++) {
    254 		vp = swap_vnodes[i];
    255 		if (vp) {
    256 			VN_HOLD(vp);
    257 			(void) VOP_PUTPAGE(vp, (offset_t)0, 0,
    258 			    (B_ASYNC | B_FREE), kcred, NULL);
    259 			VN_RELE(vp);
    260 		}
    261 	}
    262 	return (0);
    263 }
    264 
    265 extern int sw_pending_size;
    266 
    267 /*
    268  * Take an async request off the pending queue
    269  */
    270 struct async_reqs *
    271 sw_getreq()
    272 {
    273 	struct async_reqs *arg;
    274 
    275 	mutex_enter(&swapfs_lock);
    276 	arg = sw_pendlist;
    277 	if (arg) {
    278 		sw_pendlist = arg->a_next;
    279 		arg->a_next = NULL;
    280 		sw_pending_size -= PAGESIZE;
    281 	}
    282 	ASSERT(sw_pending_size >= 0);
    283 	mutex_exit(&swapfs_lock);
    284 	return (arg);
    285 }
    286 
    287 /*
    288  * Put an async request on the pending queue
    289  */
    290 void
    291 sw_putreq(struct async_reqs *arg)
    292 {
    293 	/* Hold onto it */
    294 	VN_HOLD(arg->a_vp);
    295 
    296 	mutex_enter(&swapfs_lock);
    297 	arg->a_next = sw_pendlist;
    298 	sw_pendlist = arg;
    299 	sw_pending_size += PAGESIZE;
    300 	mutex_exit(&swapfs_lock);
    301 }
    302 
    303 /*
    304  * Put an async request back on the pending queue
    305  */
    306 void
    307 sw_putbackreq(struct async_reqs *arg)
    308 {
    309 	mutex_enter(&swapfs_lock);
    310 	arg->a_next = sw_pendlist;
    311 	sw_pendlist = arg;
    312 	sw_pending_size += PAGESIZE;
    313 	mutex_exit(&swapfs_lock);
    314 }
    315 
    316 /*
    317  * Take an async request structure off the free list
    318  */
    319 struct async_reqs *
    320 sw_getfree()
    321 {
    322 	struct async_reqs *arg;
    323 
    324 	mutex_enter(&swapfs_lock);
    325 	arg = sw_freelist;
    326 	if (arg) {
    327 		sw_freelist = arg->a_next;
    328 		arg->a_next = NULL;
    329 	}
    330 	mutex_exit(&swapfs_lock);
    331 	return (arg);
    332 }
    333 
    334 /*
    335  * Put an async request structure on the free list
    336  */
    337 void
    338 sw_putfree(struct async_reqs *arg)
    339 {
    340 	/* Release our hold - should have locked the page by now */
    341 	VN_RELE(arg->a_vp);
    342 
    343 	mutex_enter(&swapfs_lock);
    344 	arg->a_next = sw_freelist;
    345 	sw_freelist = arg;
    346 	mutex_exit(&swapfs_lock);
    347 }
    348 
    349 static pgcnt_t swapfs_pending_delete;
    350 
    351 /*ARGSUSED*/
    352 static void
    353 swap_mem_config_post_add(
    354 	void *arg,
    355 	pgcnt_t delta_swaps)
    356 {
    357 	(void) swapfs_recalc(physmem - swapfs_pending_delete);
    358 }
    359 
    360 /*ARGSUSED*/
    361 static int
    362 swap_mem_config_pre_del(
    363 	void *arg,
    364 	pgcnt_t delta_swaps)
    365 {
    366 	pgcnt_t nv;
    367 
    368 	nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps);
    369 	if (!swapfs_recalc(physmem - nv)) {
    370 		/*
    371 		 * Tidy-up is done by the call to post_del which
    372 		 * is always made.
    373 		 */
    374 		cmn_err(CE_NOTE, "Memory operation refused to ensure system "
    375 		    "doesn't deadlock due to excessive consumption by swapfs.");
    376 		return (EBUSY);
    377 	}
    378 	return (0);
    379 }
    380 
    381 /*ARGSUSED*/
    382 static void
    383 swap_mem_config_post_del(
    384 	void *arg,
    385 	pgcnt_t delta_swaps,
    386 	int cancelled)
    387 {
    388 	pgcnt_t nv;
    389 
    390 	nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps);
    391 	(void) swapfs_recalc(physmem - nv);
    392 }
    393 
    394 static kphysm_setup_vector_t swap_mem_config_vec = {
    395 	KPHYSM_SETUP_VECTOR_VERSION,
    396 	swap_mem_config_post_add,
    397 	swap_mem_config_pre_del,
    398 	swap_mem_config_post_del,
    399 };
    400 
    401 static void
    402 swap_init_mem_config(void)
    403 {
    404 	int ret;
    405 
    406 	ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL);
    407 	ASSERT(ret == 0);
    408 }
    409