Home | History | Annotate | Download | only in swapfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/param.h>
     28 #include <sys/systm.h>
     29 #include <sys/buf.h>
     30 #include <sys/cred.h>
     31 #include <sys/errno.h>
     32 #include <sys/vnode.h>
     33 #include <sys/vfs_opreg.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/swap.h>
     36 #include <sys/mman.h>
     37 #include <sys/vmsystm.h>
     38 #include <sys/vtrace.h>
     39 #include <sys/debug.h>
     40 #include <sys/sysmacros.h>
     41 #include <sys/vm.h>
     42 
     43 #include <sys/fs/swapnode.h>
     44 
     45 #include <vm/seg.h>
     46 #include <vm/page.h>
     47 #include <vm/pvn.h>
     48 #include <fs/fs_subr.h>
     49 
     50 #include <vm/seg_kp.h>
     51 
     52 /*
     53  * Define the routines within this file.
     54  */
     55 static int	swap_getpage(struct vnode *vp, offset_t off, size_t len,
     56     uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
     57     caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
     58 static int	swap_putpage(struct vnode *vp, offset_t off, size_t len,
     59     int flags, struct cred *cr, caller_context_t *ct);
     60 static void	swap_inactive(struct vnode *vp, struct cred *cr,
     61     caller_context_t *ct);
     62 static void	swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
     63     cred_t *cr, caller_context_t *ct);
     64 
     65 static int	swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
     66     uint_t *protp, page_t **plarr, size_t plsz,
     67     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
     68 
     69 int	swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
     70     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
     71     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
     72     enum seg_rw rw, struct cred *cr);
     73 
     74 static int 	swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
     75     size_t *lenp, int flags, struct cred *cr);
     76 
     77 const fs_operation_def_t swap_vnodeops_template[] = {
     78 	VOPNAME_INACTIVE,	{ .vop_inactive = swap_inactive },
     79 	VOPNAME_GETPAGE,	{ .vop_getpage = swap_getpage },
     80 	VOPNAME_PUTPAGE,	{ .vop_putpage = swap_putpage },
     81 	VOPNAME_DISPOSE,	{ .vop_dispose = swap_dispose },
     82 	VOPNAME_SETFL,		{ .error = fs_error },
     83 	VOPNAME_POLL,		{ .error = fs_error },
     84 	VOPNAME_PATHCONF,	{ .error = fs_error },
     85 	VOPNAME_GETSECATTR,	{ .error = fs_error },
     86 	VOPNAME_SHRLOCK,	{ .error = fs_error },
     87 	NULL,			NULL
     88 };
     89 
     90 vnodeops_t *swap_vnodeops;
     91 
     92 /* ARGSUSED */
     93 static void
     94 swap_inactive(
     95 	struct vnode *vp,
     96 	struct cred *cr,
     97 	caller_context_t *ct)
     98 {
     99 	SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
    100 }
    101 
    102 /*
    103  * Return all the pages from [off..off+len] in given file
    104  */
    105 /*ARGSUSED*/
    106 static int
    107 swap_getpage(
    108 	struct vnode *vp,
    109 	offset_t off,
    110 	size_t len,
    111 	uint_t *protp,
    112 	page_t *pl[],
    113 	size_t plsz,
    114 	struct seg *seg,
    115 	caddr_t addr,
    116 	enum seg_rw rw,
    117 	struct cred *cr,
    118 	caller_context_t *ct)
    119 {
    120 	int err;
    121 
    122 	SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
    123 	    (void *)vp, off, len, 0, 0);
    124 
    125 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
    126 	    "swapfs getpage:vp %p off %llx len %ld",
    127 	    (void *)vp, off, len);
    128 
    129 	if (len <= PAGESIZE) {
    130 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
    131 		    seg, addr, rw, cr);
    132 	} else {
    133 		err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
    134 		    protp, pl, plsz, seg, addr, rw, cr);
    135 	}
    136 
    137 	return (err);
    138 }
    139 
    140 /*
    141  * Called from pvn_getpages or swap_getpage to get a particular page.
    142  */
    143 /*ARGSUSED*/
    144 static int
    145 swap_getapage(
    146 	struct vnode *vp,
    147 	u_offset_t off,
    148 	size_t len,
    149 	uint_t *protp,
    150 	page_t *pl[],
    151 	size_t plsz,
    152 	struct seg *seg,
    153 	caddr_t addr,
    154 	enum seg_rw rw,
    155 	struct cred *cr)
    156 {
    157 	struct page *pp, *rpp;
    158 	int flags;
    159 	int err = 0;
    160 	struct vnode *pvp = NULL;
    161 	u_offset_t poff;
    162 	int flag_noreloc;
    163 	se_t lock;
    164 	extern int kcage_on;
    165 	int upgrade = 0;
    166 
    167 	SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
    168 	    vp, off, len, 0, 0);
    169 
    170 	/*
    171 	 * Until there is a call-back mechanism to cause SEGKP
    172 	 * pages to be unlocked, make them non-relocatable.
    173 	 */
    174 	if (SEG_IS_SEGKP(seg))
    175 		flag_noreloc = PG_NORELOC;
    176 	else
    177 		flag_noreloc = 0;
    178 
    179 	if (protp != NULL)
    180 		*protp = PROT_ALL;
    181 
    182 	lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
    183 
    184 again:
    185 	if (pp = page_lookup(vp, off, lock)) {
    186 		/*
    187 		 * In very rare instances, a segkp page may have been
    188 		 * relocated outside of the kernel by the kernel cage
    189 		 * due to the window between page_unlock() and
    190 		 * VOP_PUTPAGE() in segkp_unlock().  Due to the
    191 		 * rareness of these occurances, the solution is to
    192 		 * relocate the page to a P_NORELOC page.
    193 		 */
    194 		if (flag_noreloc != 0) {
    195 			if (!PP_ISNORELOC(pp) && kcage_on) {
    196 				if (lock != SE_EXCL) {
    197 					upgrade = 1;
    198 					if (!page_tryupgrade(pp)) {
    199 						page_unlock(pp);
    200 						lock = SE_EXCL;
    201 						goto again;
    202 					}
    203 				}
    204 
    205 				if (page_relocate_cage(&pp, &rpp) != 0)
    206 					panic("swap_getapage: "
    207 					    "page_relocate_cage failed");
    208 
    209 				pp = rpp;
    210 			}
    211 		}
    212 
    213 		if (pl) {
    214 			if (upgrade)
    215 				page_downgrade(pp);
    216 
    217 			pl[0] = pp;
    218 			pl[1] = NULL;
    219 		} else {
    220 			page_unlock(pp);
    221 		}
    222 	} else {
    223 		pp = page_create_va(vp, off, PAGESIZE,
    224 		    PG_WAIT | PG_EXCL | flag_noreloc,
    225 		    seg, addr);
    226 		/*
    227 		 * Someone raced in and created the page after we did the
    228 		 * lookup but before we did the create, so go back and
    229 		 * try to look it up again.
    230 		 */
    231 		if (pp == NULL)
    232 			goto again;
    233 		if (rw != S_CREATE) {
    234 			err = swap_getphysname(vp, off, &pvp, &poff);
    235 			if (pvp) {
    236 				struct anon *ap;
    237 				kmutex_t *ahm;
    238 
    239 				flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
    240 				err = VOP_PAGEIO(pvp, pp, poff,
    241 				    PAGESIZE, flags, cr, NULL);
    242 
    243 				if (!err) {
    244 					ahm = &anonhash_lock[AH_LOCK(vp, off)];
    245 					mutex_enter(ahm);
    246 
    247 					ap = swap_anon(vp, off);
    248 					if (ap == NULL) {
    249 						panic("swap_getapage:"
    250 						    " null anon");
    251 					}
    252 
    253 					if (ap->an_pvp == pvp &&
    254 					    ap->an_poff == poff) {
    255 						swap_phys_free(pvp, poff,
    256 						    PAGESIZE);
    257 						ap->an_pvp = NULL;
    258 						ap->an_poff = NULL;
    259 						hat_setmod(pp);
    260 					}
    261 
    262 					mutex_exit(ahm);
    263 				}
    264 			} else {
    265 				if (!err)
    266 					pagezero(pp, 0, PAGESIZE);
    267 
    268 				/*
    269 				 * If it's a fault ahead, release page_io_lock
    270 				 * and SE_EXCL we grabbed in page_create_va
    271 				 *
    272 				 * If we are here, we haven't called VOP_PAGEIO
    273 				 * and thus calling pvn_read_done(pp, B_READ)
    274 				 * below may mislead that we tried i/o. Besides,
    275 				 * in case of async, pvn_read_done() should
    276 				 * not be called by *getpage()
    277 				 */
    278 				if (pl == NULL) {
    279 					/*
    280 					 * swap_getphysname can return error
    281 					 * only when we are getting called from
    282 					 * swapslot_free which passes non-NULL
    283 					 * pl to VOP_GETPAGE.
    284 					 */
    285 					ASSERT(err == 0);
    286 					page_io_unlock(pp);
    287 					page_unlock(pp);
    288 				}
    289 			}
    290 		}
    291 
    292 		ASSERT(pp != NULL);
    293 
    294 		if (err && pl)
    295 			pvn_read_done(pp, B_ERROR);
    296 
    297 		if (!err && pl)
    298 			pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
    299 	}
    300 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
    301 	    "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
    302 	return (err);
    303 }
    304 
    305 /*
    306  * Called from large page anon routines only! This is an ugly hack where
    307  * the anon layer directly calls into swapfs with a preallocated large page.
    308  * Another method would have been to change to VOP and add an extra arg for
    309  * the preallocated large page. This all could be cleaned up later when we
    310  * solve the anonymous naming problem and no longer need to loop across of
    311  * the VOP in PAGESIZE increments to fill in or initialize a large page as
    312  * is done today. I think the latter is better since it avoid a change to
    313  * the VOP interface that could later be avoided.
    314  */
    315 int
    316 swap_getconpage(
    317 	struct vnode *vp,
    318 	u_offset_t off,
    319 	size_t len,
    320 	uint_t *protp,
    321 	page_t *pl[],
    322 	size_t plsz,
    323 	page_t	*conpp,
    324 	uint_t	*pszc,
    325 	spgcnt_t *nreloc,
    326 	struct seg *seg,
    327 	caddr_t addr,
    328 	enum seg_rw rw,
    329 	struct cred *cr)
    330 {
    331 	struct page	*pp;
    332 	int 		err = 0;
    333 	struct vnode	*pvp = NULL;
    334 	u_offset_t	poff;
    335 
    336 	ASSERT(len == PAGESIZE);
    337 	ASSERT(pl != NULL);
    338 	ASSERT(plsz == PAGESIZE);
    339 	ASSERT(protp == NULL);
    340 	ASSERT(nreloc != NULL);
    341 	ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
    342 	SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
    343 	    vp, off, len, 0, 0);
    344 
    345 	/*
    346 	 * If we are not using a preallocated page then we know one already
    347 	 * exists. So just let the old code handle it.
    348 	 */
    349 	if (conpp == NULL) {
    350 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
    351 		    seg, addr, rw, cr);
    352 		return (err);
    353 	}
    354 	ASSERT(conpp->p_szc != 0);
    355 	ASSERT(PAGE_EXCL(conpp));
    356 
    357 
    358 	ASSERT(conpp->p_next == conpp);
    359 	ASSERT(conpp->p_prev == conpp);
    360 	ASSERT(!PP_ISAGED(conpp));
    361 	ASSERT(!PP_ISFREE(conpp));
    362 
    363 	*nreloc = 0;
    364 	pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
    365 
    366 	/*
    367 	 * If existing page is found we may need to relocate.
    368 	 */
    369 	if (pp != conpp) {
    370 		ASSERT(rw != S_CREATE);
    371 		ASSERT(pszc != NULL);
    372 		ASSERT(PAGE_SHARED(pp));
    373 		if (pp->p_szc < conpp->p_szc) {
    374 			*pszc = pp->p_szc;
    375 			page_unlock(pp);
    376 			err = -1;
    377 		} else if (pp->p_szc > conpp->p_szc &&
    378 		    seg->s_szc > conpp->p_szc) {
    379 			*pszc = MIN(pp->p_szc, seg->s_szc);
    380 			page_unlock(pp);
    381 			err = -2;
    382 		} else {
    383 			pl[0] = pp;
    384 			pl[1] = NULL;
    385 			if (page_pptonum(pp) &
    386 			    (page_get_pagecnt(conpp->p_szc) - 1))
    387 				cmn_err(CE_PANIC, "swap_getconpage: no root");
    388 		}
    389 		return (err);
    390 	}
    391 
    392 	ASSERT(PAGE_EXCL(pp));
    393 
    394 	if (*nreloc != 0) {
    395 		ASSERT(rw != S_CREATE);
    396 		pl[0] = pp;
    397 		pl[1] = NULL;
    398 		return (0);
    399 	}
    400 
    401 	*nreloc = 1;
    402 
    403 	/*
    404 	 * If necessary do the page io.
    405 	 */
    406 	if (rw != S_CREATE) {
    407 		/*
    408 		 * Since we are only called now on behalf of an
    409 		 * address space operation it's impossible for
    410 		 * us to fail unlike swap_getapge() which
    411 		 * also gets called from swapslot_free().
    412 		 */
    413 		if (swap_getphysname(vp, off, &pvp, &poff)) {
    414 			cmn_err(CE_PANIC,
    415 			    "swap_getconpage: swap_getphysname failed!");
    416 		}
    417 
    418 		if (pvp != NULL) {
    419 			err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
    420 			    cr, NULL);
    421 			if (err == 0) {
    422 				struct anon *ap;
    423 				kmutex_t *ahm;
    424 
    425 				ahm = &anonhash_lock[AH_LOCK(vp, off)];
    426 				mutex_enter(ahm);
    427 				ap = swap_anon(vp, off);
    428 				if (ap == NULL)
    429 					panic("swap_getconpage: null anon");
    430 				if (ap->an_pvp != pvp || ap->an_poff != poff)
    431 					panic("swap_getconpage: bad anon");
    432 
    433 				swap_phys_free(pvp, poff, PAGESIZE);
    434 				ap->an_pvp = NULL;
    435 				ap->an_poff = NULL;
    436 				hat_setmod(pp);
    437 				mutex_exit(ahm);
    438 			}
    439 		} else {
    440 			pagezero(pp, 0, PAGESIZE);
    441 		}
    442 	}
    443 
    444 	/*
    445 	 * Normally we would let pvn_read_done() destroy
    446 	 * the page on IO error. But since this is a preallocated
    447 	 * page we'll let the anon layer handle it.
    448 	 */
    449 	page_io_unlock(pp);
    450 	if (err != 0)
    451 		page_hashout(pp, NULL);
    452 	ASSERT(pp->p_next == pp);
    453 	ASSERT(pp->p_prev == pp);
    454 
    455 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
    456 	    "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
    457 
    458 	pl[0] = pp;
    459 	pl[1] = NULL;
    460 	return (err);
    461 }
    462 
    463 /* Async putpage klustering stuff */
    464 int sw_pending_size;
    465 extern int klustsize;
    466 extern struct async_reqs *sw_getreq();
    467 extern void sw_putreq(struct async_reqs *);
    468 extern void sw_putbackreq(struct async_reqs *);
    469 extern struct async_reqs *sw_getfree();
    470 extern void sw_putfree(struct async_reqs *);
    471 
    472 static size_t swap_putpagecnt, swap_pagespushed;
    473 static size_t swap_otherfail, swap_otherpages;
    474 static size_t swap_klustfail, swap_klustpages;
    475 static size_t swap_getiofail, swap_getiopages;
    476 
    477 /*
    478  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
    479  * If len == 0, do from off to EOF.
    480  */
    481 static int swap_nopage = 0;	/* Don't do swap_putpage's if set */
    482 
    483 /* ARGSUSED */
    484 static int
    485 swap_putpage(
    486 	struct vnode *vp,
    487 	offset_t off,
    488 	size_t len,
    489 	int flags,
    490 	struct cred *cr,
    491 	caller_context_t *ct)
    492 {
    493 	page_t *pp;
    494 	u_offset_t io_off;
    495 	size_t io_len = 0;
    496 	int err = 0;
    497 	int nowait;
    498 	struct async_reqs *arg;
    499 
    500 	if (swap_nopage)
    501 		return (0);
    502 
    503 	ASSERT(vp->v_count != 0);
    504 
    505 	nowait = flags & B_PAGE_NOWAIT;
    506 
    507 	/*
    508 	 * Clear force flag so that p_lckcnt pages are not invalidated.
    509 	 */
    510 	flags &= ~(B_FORCE | B_PAGE_NOWAIT);
    511 
    512 	SWAPFS_PRINT(SWAP_VOPS,
    513 	    "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
    514 	    (void *)vp, off, len, flags, 0);
    515 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
    516 	    "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
    517 
    518 	if (vp->v_flag & VNOMAP)
    519 		return (ENOSYS);
    520 
    521 	if (!vn_has_cached_data(vp))
    522 		return (0);
    523 
    524 	if (len == 0) {
    525 		if (curproc == proc_pageout)
    526 			cmn_err(CE_PANIC, "swapfs: pageout can't block");
    527 
    528 		/* Search the entire vp list for pages >= off. */
    529 		err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
    530 		    flags, cr);
    531 	} else {
    532 		u_offset_t eoff;
    533 
    534 		/*
    535 		 * Loop over all offsets in the range [off...off + len]
    536 		 * looking for pages to deal with.
    537 		 */
    538 		eoff = off + len;
    539 		for (io_off = (u_offset_t)off; io_off < eoff;
    540 		    io_off += io_len) {
    541 			/*
    542 			 * If we run out of the async req slot, put the page
    543 			 * now instead of queuing.
    544 			 */
    545 			if (flags == (B_ASYNC | B_FREE) &&
    546 			    sw_pending_size < klustsize &&
    547 			    (arg = sw_getfree())) {
    548 				/*
    549 				 * If we are clustering, we should allow
    550 				 * pageout to feed us more pages because # of
    551 				 * pushes is limited by # of I/Os, and one
    552 				 * cluster is considered to be one I/O.
    553 				 */
    554 				if (pushes)
    555 					pushes--;
    556 
    557 				arg->a_vp = vp;
    558 				arg->a_off = io_off;
    559 				arg->a_len = PAGESIZE;
    560 				arg->a_flags = B_ASYNC | B_FREE;
    561 				arg->a_cred = kcred;
    562 				sw_putreq(arg);
    563 				io_len = PAGESIZE;
    564 				continue;
    565 			}
    566 			/*
    567 			 * If we are not invalidating pages, use the
    568 			 * routine page_lookup_nowait() to prevent
    569 			 * reclaiming them from the free list.
    570 			 */
    571 			if (!nowait && ((flags & B_INVAL) ||
    572 			    (flags & (B_ASYNC | B_FREE)) == B_FREE))
    573 				pp = page_lookup(vp, io_off, SE_EXCL);
    574 			else
    575 				pp = page_lookup_nowait(vp, io_off,
    576 				    (flags & (B_FREE | B_INVAL)) ?
    577 				    SE_EXCL : SE_SHARED);
    578 
    579 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
    580 				io_len = PAGESIZE;
    581 			else {
    582 				err = swap_putapage(vp, pp, &io_off, &io_len,
    583 				    flags, cr);
    584 				if (err != 0)
    585 					break;
    586 			}
    587 		}
    588 	}
    589 	/* If invalidating, verify all pages on vnode list are gone. */
    590 	if (err == 0 && off == 0 && len == 0 &&
    591 	    (flags & B_INVAL) && vn_has_cached_data(vp)) {
    592 		cmn_err(CE_WARN,
    593 		    "swap_putpage: B_INVAL, pages not gone");
    594 	}
    595 	return (err);
    596 }
    597 
    598 /*
    599  * Write out a single page.
    600  * For swapfs this means choose a physical swap slot and write the page
    601  * out using VOP_PAGEIO.
    602  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
    603  * swapfs pages, a bunch of contiguous swap slots and then write them
    604  * all out in one clustered i/o.
    605  */
    606 /*ARGSUSED*/
    607 static int
    608 swap_putapage(
    609 	struct vnode *vp,
    610 	page_t *pp,
    611 	u_offset_t *offp,
    612 	size_t *lenp,
    613 	int flags,
    614 	struct cred *cr)
    615 {
    616 	int err;
    617 	struct vnode *pvp;
    618 	u_offset_t poff, off;
    619 	u_offset_t doff;
    620 	size_t dlen;
    621 	size_t klsz = 0;
    622 	u_offset_t klstart = 0;
    623 	struct vnode *klvp = NULL;
    624 	page_t *pplist;
    625 	se_t se;
    626 	struct async_reqs *arg;
    627 	size_t swap_klustsize;
    628 
    629 	/*
    630 	 * This check is added for callers who access swap_putpage with len = 0.
    631 	 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
    632 	 * And it's necessary to do the same queuing if users have the same
    633 	 * B_ASYNC|B_FREE flags on.
    634 	 */
    635 	if (flags == (B_ASYNC | B_FREE) &&
    636 	    sw_pending_size < klustsize && (arg = sw_getfree())) {
    637 
    638 		hat_setmod(pp);
    639 		page_io_unlock(pp);
    640 		page_unlock(pp);
    641 
    642 		arg->a_vp = vp;
    643 		arg->a_off = pp->p_offset;
    644 		arg->a_len = PAGESIZE;
    645 		arg->a_flags = B_ASYNC | B_FREE;
    646 		arg->a_cred = kcred;
    647 		sw_putreq(arg);
    648 
    649 		return (0);
    650 	}
    651 
    652 	SWAPFS_PRINT(SWAP_PUTP,
    653 	    "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
    654 	    pp, vp, pp->p_offset, flags, 0);
    655 
    656 	ASSERT(PAGE_LOCKED(pp));
    657 
    658 	off = pp->p_offset;
    659 
    660 	doff = off;
    661 	dlen = PAGESIZE;
    662 
    663 	if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
    664 		err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
    665 		hat_setmod(pp);
    666 		page_io_unlock(pp);
    667 		page_unlock(pp);
    668 		goto out;
    669 	}
    670 
    671 	klvp = pvp;
    672 	klstart = poff;
    673 	pplist = pp;
    674 	/*
    675 	 * If this is ASYNC | FREE and we've accumulated a bunch of such
    676 	 * pending requests, kluster.
    677 	 */
    678 	if (flags == (B_ASYNC | B_FREE))
    679 		swap_klustsize = klustsize;
    680 	else
    681 		swap_klustsize = PAGESIZE;
    682 	se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
    683 	klsz = PAGESIZE;
    684 	while (klsz < swap_klustsize) {
    685 		if ((arg = sw_getreq()) == NULL) {
    686 			swap_getiofail++;
    687 			swap_getiopages += btop(klsz);
    688 			break;
    689 		}
    690 		ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
    691 		vp = arg->a_vp;
    692 		off = arg->a_off;
    693 
    694 		if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
    695 			swap_otherfail++;
    696 			swap_otherpages += btop(klsz);
    697 			sw_putfree(arg);
    698 			break;
    699 		}
    700 		if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
    701 			sw_putfree(arg);
    702 			continue;
    703 		}
    704 		/* Get new physical backing store for the page */
    705 		doff = off;
    706 		dlen = PAGESIZE;
    707 		if (err = swap_newphysname(vp, off, &doff, &dlen,
    708 		    &pvp, &poff)) {
    709 			swap_otherfail++;
    710 			swap_otherpages += btop(klsz);
    711 			hat_setmod(pp);
    712 			page_io_unlock(pp);
    713 			page_unlock(pp);
    714 			sw_putbackreq(arg);
    715 			break;
    716 		}
    717 		/* Try to cluster new physical name with previous ones */
    718 		if (klvp == pvp && poff == klstart + klsz) {
    719 			klsz += PAGESIZE;
    720 			page_add(&pplist, pp);
    721 			pplist = pplist->p_next;
    722 			sw_putfree(arg);
    723 		} else if (klvp == pvp && poff == klstart - PAGESIZE) {
    724 			klsz += PAGESIZE;
    725 			klstart -= PAGESIZE;
    726 			page_add(&pplist, pp);
    727 			sw_putfree(arg);
    728 		} else {
    729 			swap_klustfail++;
    730 			swap_klustpages += btop(klsz);
    731 			hat_setmod(pp);
    732 			page_io_unlock(pp);
    733 			page_unlock(pp);
    734 			sw_putbackreq(arg);
    735 			break;
    736 		}
    737 	}
    738 
    739 	err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
    740 	    B_WRITE | flags, cr, NULL);
    741 
    742 	if ((flags & B_ASYNC) == 0)
    743 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
    744 
    745 	/* Statistics */
    746 	if (!err) {
    747 		swap_putpagecnt++;
    748 		swap_pagespushed += btop(klsz);
    749 	}
    750 out:
    751 	TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
    752 	    "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
    753 	    vp, klvp, klstart, klsz);
    754 	if (err && err != ENOMEM)
    755 		cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
    756 	if (lenp)
    757 		*lenp = PAGESIZE;
    758 	return (err);
    759 }
    760 
    761 static void
    762 swap_dispose(
    763 	vnode_t *vp,
    764 	page_t *pp,
    765 	int fl,
    766 	int dn,
    767 	cred_t *cr,
    768 	caller_context_t *ct)
    769 {
    770 	int err;
    771 	u_offset_t off = pp->p_offset;
    772 	vnode_t *pvp;
    773 	u_offset_t poff;
    774 
    775 	ASSERT(PAGE_EXCL(pp));
    776 
    777 	/*
    778 	 * The caller will free/invalidate large page in one shot instead of
    779 	 * one small page at a time.
    780 	 */
    781 	if (pp->p_szc != 0) {
    782 		page_unlock(pp);
    783 		return;
    784 	}
    785 
    786 	err = swap_getphysname(vp, off, &pvp, &poff);
    787 	if (!err && pvp != NULL)
    788 		VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
    789 	else
    790 		fs_dispose(vp, pp, fl, dn, cr, ct);
    791 }
    792