Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 
     32 #include <sys/param.h>
     33 #include <sys/types.h>
     34 #include <sys/systm.h>
     35 #include <sys/cred.h>
     36 #include <sys/proc.h>
     37 #include <sys/user.h>
     38 #include <sys/time.h>
     39 #include <sys/buf.h>
     40 #include <sys/vfs.h>
     41 #include <sys/vnode.h>
     42 #include <sys/socket.h>
     43 #include <sys/uio.h>
     44 #include <sys/tiuser.h>
     45 #include <sys/swap.h>
     46 #include <sys/errno.h>
     47 #include <sys/debug.h>
     48 #include <sys/kmem.h>
     49 #include <sys/kstat.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/vtrace.h>
     52 #include <sys/session.h>
     53 #include <sys/dnlc.h>
     54 #include <sys/bitmap.h>
     55 #include <sys/acl.h>
     56 #include <sys/ddi.h>
     57 #include <sys/pathname.h>
     58 #include <sys/flock.h>
     59 #include <sys/dirent.h>
     60 #include <sys/flock.h>
     61 #include <sys/callb.h>
     62 #include <sys/sdt.h>
     63 
     64 #include <rpc/types.h>
     65 #include <rpc/xdr.h>
     66 #include <rpc/auth.h>
     67 #include <rpc/rpcsec_gss.h>
     68 #include <rpc/clnt.h>
     69 
     70 #include <nfs/nfs.h>
     71 #include <nfs/nfs_clnt.h>
     72 #include <nfs/nfs_acl.h>
     73 
     74 #include <nfs/nfs4.h>
     75 #include <nfs/rnode4.h>
     76 #include <nfs/nfs4_clnt.h>
     77 
     78 /*
     79  * The hash queues for the access to active and cached rnodes
     80  * are organized as doubly linked lists.  A reader/writer lock
     81  * for each hash bucket is used to control access and to synchronize
     82  * lookups, additions, and deletions from the hash queue.
     83  *
     84  * The rnode freelist is organized as a doubly linked list with
     85  * a head pointer.  Additions and deletions are synchronized via
     86  * a single mutex.
     87  *
     88  * In order to add an rnode to the free list, it must be hashed into
     89  * a hash queue and the exclusive lock to the hash queue be held.
     90  * If an rnode is not hashed into a hash queue, then it is destroyed
     91  * because it represents no valuable information that can be reused
     92  * about the file.  The exclusive lock to the hash queue must be
     93  * held in order to prevent a lookup in the hash queue from finding
     94  * the rnode and using it and assuming that the rnode is not on the
     95  * freelist.  The lookup in the hash queue will have the hash queue
     96  * locked, either exclusive or shared.
     97  *
     98  * The vnode reference count for each rnode is not allowed to drop
     99  * below 1.  This prevents external entities, such as the VM
    100  * subsystem, from acquiring references to vnodes already on the
    101  * freelist and then trying to place them back on the freelist
    102  * when their reference is released.  This means that the when an
    103  * rnode is looked up in the hash queues, then either the rnode
    104  * is removed from the freelist and that reference is transferred to
    105  * the new reference or the vnode reference count must be incremented
    106  * accordingly.  The mutex for the freelist must be held in order to
    107  * accurately test to see if the rnode is on the freelist or not.
    108  * The hash queue lock might be held shared and it is possible that
    109  * two different threads may race to remove the rnode from the
    110  * freelist.  This race can be resolved by holding the mutex for the
    111  * freelist.  Please note that the mutex for the freelist does not
    112  * need to be held if the rnode is not on the freelist.  It can not be
    113  * placed on the freelist due to the requirement that the thread
    114  * putting the rnode on the freelist must hold the exclusive lock
    115  * to the hash queue and the thread doing the lookup in the hash
    116  * queue is holding either a shared or exclusive lock to the hash
    117  * queue.
    118  *
    119  * The lock ordering is:
    120  *
    121  *	hash bucket lock -> vnode lock
    122  *	hash bucket lock -> freelist lock -> r_statelock
    123  */
    124 r4hashq_t *rtable4;
    125 
    126 static kmutex_t rp4freelist_lock;
    127 static rnode4_t *rp4freelist = NULL;
    128 static long rnode4_new = 0;
    129 int rtable4size;
    130 static int rtable4mask;
    131 static struct kmem_cache *rnode4_cache;
    132 static int rnode4_hashlen = 4;
    133 
    134 static void	r4inactive(rnode4_t *, cred_t *);
    135 static vnode_t	*make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
    136 		    struct vnodeops *,
    137 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
    138 		    cred_t *),
    139 		    int *, cred_t *);
    140 static void	rp4_rmfree(rnode4_t *);
    141 int		nfs4_free_data_reclaim(rnode4_t *);
    142 static int	nfs4_active_data_reclaim(rnode4_t *);
    143 static int	nfs4_free_reclaim(void);
    144 static int	nfs4_active_reclaim(void);
    145 static int	nfs4_rnode_reclaim(void);
    146 static void	nfs4_reclaim(void *);
    147 static int	isrootfh(nfs4_sharedfh_t *, rnode4_t *);
    148 static void	uninit_rnode4(rnode4_t *);
    149 static void	destroy_rnode4(rnode4_t *);
    150 static void	r4_stub_set(rnode4_t *, nfs4_stub_type_t);
    151 
    152 #ifdef DEBUG
    153 static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
    154 static int nfs4_rnode_debug = 0;
    155 /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
    156 static int nfs4_rnode_nofreelist = 0;
    157 /* give messages on colliding shared filehandles */
    158 static void	r4_dup_check(rnode4_t *, vfs_t *);
    159 #endif
    160 
    161 /*
    162  * If the vnode has pages, run the list and check for any that are
    163  * still dangling.  We call this routine before putting an rnode on
    164  * the free list.
    165  */
    166 static int
    167 nfs4_dross_pages(vnode_t *vp)
    168 {
    169 	page_t *pp;
    170 	kmutex_t *vphm;
    171 
    172 	vphm = page_vnode_mutex(vp);
    173 	mutex_enter(vphm);
    174 	if ((pp = vp->v_pages) != NULL) {
    175 		do {
    176 			if (pp->p_fsdata != C_NOCOMMIT) {
    177 				mutex_exit(vphm);
    178 				return (1);
    179 			}
    180 		} while ((pp = pp->p_vpnext) != vp->v_pages);
    181 	}
    182 	mutex_exit(vphm);
    183 
    184 	return (0);
    185 }
    186 
    187 /*
    188  * Flush any pages left on this rnode.
    189  */
    190 static void
    191 r4flushpages(rnode4_t *rp, cred_t *cr)
    192 {
    193 	vnode_t *vp;
    194 	int error;
    195 
    196 	/*
    197 	 * Before freeing anything, wait until all asynchronous
    198 	 * activity is done on this rnode.  This will allow all
    199 	 * asynchronous read ahead and write behind i/o's to
    200 	 * finish.
    201 	 */
    202 	mutex_enter(&rp->r_statelock);
    203 	while (rp->r_count > 0)
    204 		cv_wait(&rp->r_cv, &rp->r_statelock);
    205 	mutex_exit(&rp->r_statelock);
    206 
    207 	/*
    208 	 * Flush and invalidate all pages associated with the vnode.
    209 	 */
    210 	vp = RTOV4(rp);
    211 	if (nfs4_has_pages(vp)) {
    212 		ASSERT(vp->v_type != VCHR);
    213 		if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
    214 			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
    215 			if (error && (error == ENOSPC || error == EDQUOT)) {
    216 				mutex_enter(&rp->r_statelock);
    217 				if (!rp->r_error)
    218 					rp->r_error = error;
    219 				mutex_exit(&rp->r_statelock);
    220 			}
    221 		}
    222 		nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
    223 	}
    224 }
    225 
    226 /*
    227  * Free the resources associated with an rnode.
    228  */
    229 static void
    230 r4inactive(rnode4_t *rp, cred_t *cr)
    231 {
    232 	vnode_t *vp;
    233 	char *contents;
    234 	int size;
    235 	vsecattr_t *vsp;
    236 	vnode_t *xattr;
    237 
    238 	r4flushpages(rp, cr);
    239 
    240 	vp = RTOV4(rp);
    241 
    242 	/*
    243 	 * Free any held caches which may be
    244 	 * associated with this rnode.
    245 	 */
    246 	mutex_enter(&rp->r_statelock);
    247 	contents = rp->r_symlink.contents;
    248 	size = rp->r_symlink.size;
    249 	rp->r_symlink.contents = NULL;
    250 	vsp = rp->r_secattr;
    251 	rp->r_secattr = NULL;
    252 	xattr = rp->r_xattr_dir;
    253 	rp->r_xattr_dir = NULL;
    254 	mutex_exit(&rp->r_statelock);
    255 
    256 	/*
    257 	 * Free the access cache entries.
    258 	 */
    259 	(void) nfs4_access_purge_rp(rp);
    260 
    261 	/*
    262 	 * Free the readdir cache entries.
    263 	 */
    264 	nfs4_purge_rddir_cache(vp);
    265 
    266 	/*
    267 	 * Free the symbolic link cache.
    268 	 */
    269 	if (contents != NULL) {
    270 
    271 		kmem_free((void *)contents, size);
    272 	}
    273 
    274 	/*
    275 	 * Free any cached ACL.
    276 	 */
    277 	if (vsp != NULL)
    278 		nfs4_acl_free_cache(vsp);
    279 
    280 	/*
    281 	 * Release the cached xattr_dir
    282 	 */
    283 	if (xattr != NULL)
    284 		VN_RELE(xattr);
    285 }
    286 
    287 /*
    288  * We have seen a case that the fh passed in is for "." which
    289  * should be a VROOT node, however, the fh is different from the
    290  * root fh stored in the mntinfo4_t. The invalid fh might be
    291  * from a misbehaved server and will panic the client system at
    292  * a later time. To avoid the panic, we drop the bad fh, use
    293  * the root fh from mntinfo4_t, and print an error message
    294  * for attention.
    295  */
    296 nfs4_sharedfh_t *
    297 badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
    298     int *wasbad)
    299 {
    300 	char *s;
    301 
    302 	*wasbad = 0;
    303 	s = fn_name(nm);
    304 	ASSERT(strcmp(s, "..") != 0);
    305 
    306 	if ((s[0] == '.' && s[1] == '\0') && fh &&
    307 	    !SFH4_SAME(mi->mi_rootfh, fh)) {
    308 #ifdef DEBUG
    309 		nfs4_fhandle_t fhandle;
    310 
    311 		zcmn_err(mi->mi_zone->zone_id, CE_WARN,
    312 		    "Server %s returns a different "
    313 		    "root filehandle for the path %s:",
    314 		    mi->mi_curr_serv->sv_hostname,
    315 		    mi->mi_curr_serv->sv_path);
    316 
    317 		/* print the bad fh */
    318 		fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
    319 		bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
    320 		    fhandle.fh_len);
    321 		nfs4_printfhandle(&fhandle);
    322 
    323 		/* print mi_rootfh */
    324 		fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
    325 		bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
    326 		    fhandle.fh_len);
    327 		nfs4_printfhandle(&fhandle);
    328 #endif
    329 		/* use mi_rootfh instead; fh will be rele by the caller */
    330 		fh = mi->mi_rootfh;
    331 		*wasbad = 1;
    332 	}
    333 
    334 	kmem_free(s, MAXNAMELEN);
    335 	return (fh);
    336 }
    337 
    338 void
    339 r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
    340     hrtime_t t, cred_t *cr, int index)
    341 {
    342 	int is_stub;
    343 	vattr_t *attr;
    344 	/*
    345 	 * Don't add to attrcache if time overflow, but
    346 	 * no need to check because either attr is null or the time
    347 	 * values in it were processed by nfs4_time_ntov(), which checks
    348 	 * for time overflows.
    349 	 */
    350 	attr = garp ? &garp->n4g_va : NULL;
    351 
    352 	if (attr) {
    353 		if (!newnode) {
    354 			rw_exit(&rtable4[index].r_lock);
    355 #ifdef DEBUG
    356 			if (vp->v_type != attr->va_type &&
    357 			    vp->v_type != VNON && attr->va_type != VNON) {
    358 				zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
    359 				    "makenfs4node: type (%d) doesn't "
    360 				    "match type of found node at %p (%d)",
    361 				    attr->va_type, (void *)vp, vp->v_type);
    362 			}
    363 #endif
    364 			nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
    365 		} else {
    366 			rnode4_t *rp = VTOR4(vp);
    367 
    368 			vp->v_type = attr->va_type;
    369 			vp->v_rdev = attr->va_rdev;
    370 
    371 			/*
    372 			 * Turn this object into a "stub" object if we
    373 			 * crossed an underlying server fs boundary.
    374 			 * To make this check, during mount we save the
    375 			 * fsid of the server object being mounted.
    376 			 * Here we compare this object's server fsid
    377 			 * with the fsid we saved at mount.  If they
    378 			 * are different, we crossed server fs boundary.
    379 			 *
    380 			 * The stub type is set (or not) at rnode
    381 			 * creation time and it never changes for life
    382 			 * of the rnode.
    383 			 *
    384 			 * The stub type is also set during RO failover,
    385 			 * nfs4_remap_file().
    386 			 *
    387 			 * This stub will be for a mirror-mount.
    388 			 *
    389 			 * We don't bother with taking r_state_lock to
    390 			 * set the stub type because this is a new rnode
    391 			 * and we're holding the hash bucket r_lock RW_WRITER.
    392 			 * No other thread could have obtained access
    393 			 * to this rnode.
    394 			 */
    395 			is_stub = 0;
    396 			if (garp->n4g_fsid_valid) {
    397 				fattr4_fsid ga_fsid = garp->n4g_fsid;
    398 				servinfo4_t *svp = rp->r_server;
    399 
    400 				rp->r_srv_fsid = ga_fsid;
    401 
    402 				(void) nfs_rw_enter_sig(&svp->sv_lock,
    403 				    RW_READER, 0);
    404 				if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
    405 					is_stub = 1;
    406 				nfs_rw_exit(&svp->sv_lock);
    407 			}
    408 
    409 			if (is_stub)
    410 				r4_stub_mirrormount(rp);
    411 			else
    412 				r4_stub_none(rp);
    413 
    414 			/* Can not cache partial attr */
    415 			if (attr->va_mask == AT_ALL)
    416 				nfs4_attrcache_noinval(vp, garp, t);
    417 			else
    418 				PURGE_ATTRCACHE4(vp);
    419 
    420 			rw_exit(&rtable4[index].r_lock);
    421 		}
    422 	} else {
    423 		if (newnode) {
    424 			PURGE_ATTRCACHE4(vp);
    425 		}
    426 		rw_exit(&rtable4[index].r_lock);
    427 	}
    428 }
    429 
    430 /*
    431  * Find or create an rnode based primarily on filehandle.  To be
    432  * used when dvp (vnode for parent directory) is not available;
    433  * otherwise, makenfs4node() should be used.
    434  *
    435  * The nfs4_fname_t argument *npp is consumed and nulled out.
    436  */
    437 
    438 vnode_t *
    439 makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
    440     nfs4_fname_t **npp, nfs4_ga_res_t *garp,
    441     mntinfo4_t *mi, cred_t *cr, hrtime_t t)
    442 {
    443 	vfs_t *vfsp = mi->mi_vfsp;
    444 	int newnode = 0;
    445 	vnode_t *vp;
    446 	rnode4_t *rp;
    447 	svnode_t *svp;
    448 	nfs4_fname_t *name, *svpname;
    449 	int index;
    450 
    451 	ASSERT(npp && *npp);
    452 	name = *npp;
    453 	*npp = NULL;
    454 
    455 	index = rtable4hash(sfh);
    456 	rw_enter(&rtable4[index].r_lock, RW_READER);
    457 
    458 	vp = make_rnode4(sfh, &rtable4[index], vfsp,
    459 	    nfs4_vnodeops, nfs4_putapage, &newnode, cr);
    460 
    461 	svp = VTOSV(vp);
    462 	rp = VTOR4(vp);
    463 	if (newnode) {
    464 		svp->sv_forw = svp->sv_back = svp;
    465 		svp->sv_name = name;
    466 		if (psfh != NULL)
    467 			sfh4_hold(psfh);
    468 		svp->sv_dfh = psfh;
    469 	} else {
    470 		/*
    471 		 * It is possible that due to a server
    472 		 * side rename fnames have changed.
    473 		 * update the fname here.
    474 		 */
    475 		mutex_enter(&rp->r_svlock);
    476 		svpname = svp->sv_name;
    477 		if (svp->sv_name != name) {
    478 			svp->sv_name = name;
    479 			mutex_exit(&rp->r_svlock);
    480 			fn_rele(&svpname);
    481 		} else {
    482 			mutex_exit(&rp->r_svlock);
    483 			fn_rele(&name);
    484 		}
    485 	}
    486 
    487 	ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
    488 	r4_do_attrcache(vp, garp, newnode, t, cr, index);
    489 	ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
    490 
    491 	return (vp);
    492 }
    493 
    494 /*
    495  * Find or create a vnode for the given filehandle, filesystem, parent, and
    496  * name.  The reference to nm is consumed, so the caller must first do an
    497  * fn_hold() if it wants to continue using nm after this call.
    498  */
    499 vnode_t *
    500 makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
    501     hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
    502 {
    503 	vnode_t *vp;
    504 	int newnode;
    505 	int index;
    506 	mntinfo4_t *mi = VFTOMI4(vfsp);
    507 	int had_badfh = 0;
    508 	rnode4_t *rp;
    509 
    510 	ASSERT(dvp != NULL);
    511 
    512 	fh = badrootfh_check(fh, nm, mi, &had_badfh);
    513 
    514 	index = rtable4hash(fh);
    515 	rw_enter(&rtable4[index].r_lock, RW_READER);
    516 
    517 	/*
    518 	 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
    519 	 */
    520 	vp = make_rnode4(fh, &rtable4[index], vfsp, nfs4_vnodeops,
    521 	    nfs4_putapage, &newnode, cr);
    522 
    523 	rp = VTOR4(vp);
    524 	sv_activate(&vp, dvp, &nm, newnode);
    525 	if (dvp->v_flag & V_XATTRDIR) {
    526 		mutex_enter(&rp->r_statelock);
    527 		rp->r_flags |= R4ISXATTR;
    528 		mutex_exit(&rp->r_statelock);
    529 	}
    530 
    531 	/* if getting a bad file handle, do not cache the attributes. */
    532 	if (had_badfh) {
    533 		rw_exit(&rtable4[index].r_lock);
    534 		return (vp);
    535 	}
    536 
    537 	ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
    538 	r4_do_attrcache(vp, garp, newnode, t, cr, index);
    539 	ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
    540 
    541 	return (vp);
    542 }
    543 
    544 /*
    545  * Hash on address of filehandle object.
    546  * XXX totally untuned.
    547  */
    548 
    549 int
    550 rtable4hash(nfs4_sharedfh_t *fh)
    551 {
    552 	return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
    553 }
    554 
    555 /*
    556  * Find or create the vnode for the given filehandle and filesystem.
    557  * *newnode is set to zero if the vnode already existed; non-zero if it had
    558  * to be created.
    559  *
    560  * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
    561  */
    562 
    563 static vnode_t *
    564 make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
    565     struct vnodeops *vops,
    566     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
    567     int *newnode, cred_t *cr)
    568 {
    569 	rnode4_t *rp;
    570 	rnode4_t *trp;
    571 	vnode_t *vp;
    572 	mntinfo4_t *mi;
    573 
    574 	ASSERT(RW_READ_HELD(&rhtp->r_lock));
    575 
    576 	mi = VFTOMI4(vfsp);
    577 
    578 start:
    579 	if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
    580 		vp = RTOV4(rp);
    581 		*newnode = 0;
    582 		return (vp);
    583 	}
    584 	rw_exit(&rhtp->r_lock);
    585 
    586 	mutex_enter(&rp4freelist_lock);
    587 
    588 	if (rp4freelist != NULL && rnode4_new >= nrnode) {
    589 		rp = rp4freelist;
    590 		rp4_rmfree(rp);
    591 		mutex_exit(&rp4freelist_lock);
    592 
    593 		vp = RTOV4(rp);
    594 
    595 		if (rp->r_flags & R4HASHED) {
    596 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
    597 			mutex_enter(&vp->v_lock);
    598 			if (vp->v_count > 1) {
    599 				vp->v_count--;
    600 				mutex_exit(&vp->v_lock);
    601 				rw_exit(&rp->r_hashq->r_lock);
    602 				rw_enter(&rhtp->r_lock, RW_READER);
    603 				goto start;
    604 			}
    605 			mutex_exit(&vp->v_lock);
    606 			rp4_rmhash_locked(rp);
    607 			rw_exit(&rp->r_hashq->r_lock);
    608 		}
    609 
    610 		r4inactive(rp, cr);
    611 
    612 		mutex_enter(&vp->v_lock);
    613 		if (vp->v_count > 1) {
    614 			vp->v_count--;
    615 			mutex_exit(&vp->v_lock);
    616 			rw_enter(&rhtp->r_lock, RW_READER);
    617 			goto start;
    618 		}
    619 		mutex_exit(&vp->v_lock);
    620 		vn_invalid(vp);
    621 
    622 		/*
    623 		 * destroy old locks before bzero'ing and
    624 		 * recreating the locks below.
    625 		 */
    626 		uninit_rnode4(rp);
    627 
    628 		/*
    629 		 * Make sure that if rnode is recycled then
    630 		 * VFS count is decremented properly before
    631 		 * reuse.
    632 		 */
    633 		VFS_RELE(vp->v_vfsp);
    634 		vn_reinit(vp);
    635 	} else {
    636 		vnode_t *new_vp;
    637 
    638 		mutex_exit(&rp4freelist_lock);
    639 
    640 		rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
    641 		new_vp = vn_alloc(KM_SLEEP);
    642 
    643 		atomic_add_long((ulong_t *)&rnode4_new, 1);
    644 #ifdef DEBUG
    645 		clstat4_debug.nrnode.value.ui64++;
    646 #endif
    647 		vp = new_vp;
    648 	}
    649 
    650 	bzero(rp, sizeof (*rp));
    651 	rp->r_vnode = vp;
    652 	nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
    653 	nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
    654 	mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
    655 	mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
    656 	mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
    657 	mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
    658 	rp->created_v4 = 0;
    659 	list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
    660 	    offsetof(nfs4_open_stream_t, os_node));
    661 	rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
    662 	rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
    663 	cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
    664 	cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
    665 	rp->r_flags = R4READDIRWATTR;
    666 	rp->r_fh = fh;
    667 	rp->r_hashq = rhtp;
    668 	sfh4_hold(rp->r_fh);
    669 	rp->r_server = mi->mi_curr_serv;
    670 	rp->r_deleg_type = OPEN_DELEGATE_NONE;
    671 	rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
    672 	nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
    673 
    674 	rddir4_cache_create(rp);
    675 	rp->r_putapage = putapage;
    676 	vn_setops(vp, vops);
    677 	vp->v_data = (caddr_t)rp;
    678 	vp->v_vfsp = vfsp;
    679 	VFS_HOLD(vfsp);
    680 	vp->v_type = VNON;
    681 	if (isrootfh(fh, rp))
    682 		vp->v_flag = VROOT;
    683 	vn_exists(vp);
    684 
    685 	/*
    686 	 * There is a race condition if someone else
    687 	 * alloc's the rnode while no locks are held, so we
    688 	 * check again and recover if found.
    689 	 */
    690 	rw_enter(&rhtp->r_lock, RW_WRITER);
    691 	if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
    692 		vp = RTOV4(trp);
    693 		*newnode = 0;
    694 		rw_exit(&rhtp->r_lock);
    695 		rp4_addfree(rp, cr);
    696 		rw_enter(&rhtp->r_lock, RW_READER);
    697 		return (vp);
    698 	}
    699 	rp4_addhash(rp);
    700 	*newnode = 1;
    701 	return (vp);
    702 }
    703 
    704 static void
    705 uninit_rnode4(rnode4_t *rp)
    706 {
    707 	vnode_t *vp = RTOV4(rp);
    708 
    709 	ASSERT(rp != NULL);
    710 	ASSERT(vp != NULL);
    711 	ASSERT(vp->v_count == 1);
    712 	ASSERT(rp->r_count == 0);
    713 	ASSERT(rp->r_mapcnt == 0);
    714 	if (rp->r_flags & R4LODANGLERS) {
    715 		nfs4_flush_lock_owners(rp);
    716 	}
    717 	ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
    718 	ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
    719 	ASSERT(!(rp->r_flags & R4HASHED));
    720 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
    721 	nfs4_clear_open_streams(rp);
    722 	list_destroy(&rp->r_open_streams);
    723 
    724 	/*
    725 	 * Destroy the rddir cache first since we need to grab the r_statelock.
    726 	 */
    727 	mutex_enter(&rp->r_statelock);
    728 	rddir4_cache_destroy(rp);
    729 	mutex_exit(&rp->r_statelock);
    730 	sv_uninit(&rp->r_svnode);
    731 	sfh4_rele(&rp->r_fh);
    732 	nfs_rw_destroy(&rp->r_rwlock);
    733 	nfs_rw_destroy(&rp->r_lkserlock);
    734 	mutex_destroy(&rp->r_statelock);
    735 	mutex_destroy(&rp->r_statev4_lock);
    736 	mutex_destroy(&rp->r_os_lock);
    737 	cv_destroy(&rp->r_cv);
    738 	cv_destroy(&rp->r_commit.c_cv);
    739 	nfs_rw_destroy(&rp->r_deleg_recall_lock);
    740 	if (rp->r_flags & R4DELMAPLIST)
    741 		list_destroy(&rp->r_indelmap);
    742 }
    743 
    744 /*
    745  * Put an rnode on the free list.
    746  *
    747  * Rnodes which were allocated above and beyond the normal limit
    748  * are immediately freed.
    749  */
    750 void
    751 rp4_addfree(rnode4_t *rp, cred_t *cr)
    752 {
    753 	vnode_t *vp;
    754 	vnode_t *xattr;
    755 	struct vfs *vfsp;
    756 
    757 	vp = RTOV4(rp);
    758 	ASSERT(vp->v_count >= 1);
    759 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
    760 
    761 	/*
    762 	 * If we have too many rnodes allocated and there are no
    763 	 * references to this rnode, or if the rnode is no longer
    764 	 * accessible by it does not reside in the hash queues,
    765 	 * or if an i/o error occurred while writing to the file,
    766 	 * then just free it instead of putting it on the rnode
    767 	 * freelist.
    768 	 */
    769 	vfsp = vp->v_vfsp;
    770 	if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
    771 #ifdef DEBUG
    772 	    (nfs4_rnode_nofreelist != 0) ||
    773 #endif
    774 	    rp->r_error || (rp->r_flags & R4RECOVERR) ||
    775 	    (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
    776 		if (rp->r_flags & R4HASHED) {
    777 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
    778 			mutex_enter(&vp->v_lock);
    779 			if (vp->v_count > 1) {
    780 				vp->v_count--;
    781 				mutex_exit(&vp->v_lock);
    782 				rw_exit(&rp->r_hashq->r_lock);
    783 				return;
    784 			}
    785 			mutex_exit(&vp->v_lock);
    786 			rp4_rmhash_locked(rp);
    787 			rw_exit(&rp->r_hashq->r_lock);
    788 		}
    789 
    790 		/*
    791 		 * Make sure we don't have a delegation on this rnode
    792 		 * before destroying it.
    793 		 */
    794 		if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
    795 			(void) nfs4delegreturn(rp,
    796 			    NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
    797 		}
    798 
    799 		r4inactive(rp, cr);
    800 
    801 		/*
    802 		 * Recheck the vnode reference count.  We need to
    803 		 * make sure that another reference has not been
    804 		 * acquired while we were not holding v_lock.  The
    805 		 * rnode is not in the rnode hash queues; one
    806 		 * way for a reference to have been acquired
    807 		 * is for a VOP_PUTPAGE because the rnode was marked
    808 		 * with R4DIRTY or for a modified page.  This
    809 		 * reference may have been acquired before our call
    810 		 * to r4inactive.  The i/o may have been completed,
    811 		 * thus allowing r4inactive to complete, but the
    812 		 * reference to the vnode may not have been released
    813 		 * yet.  In any case, the rnode can not be destroyed
    814 		 * until the other references to this vnode have been
    815 		 * released.  The other references will take care of
    816 		 * either destroying the rnode or placing it on the
    817 		 * rnode freelist.  If there are no other references,
    818 		 * then the rnode may be safely destroyed.
    819 		 */
    820 		mutex_enter(&vp->v_lock);
    821 		if (vp->v_count > 1) {
    822 			vp->v_count--;
    823 			mutex_exit(&vp->v_lock);
    824 			return;
    825 		}
    826 		mutex_exit(&vp->v_lock);
    827 
    828 		destroy_rnode4(rp);
    829 		return;
    830 	}
    831 
    832 	/*
    833 	 * Lock the hash queue and then recheck the reference count
    834 	 * to ensure that no other threads have acquired a reference
    835 	 * to indicate that the rnode should not be placed on the
    836 	 * freelist.  If another reference has been acquired, then
    837 	 * just release this one and let the other thread complete
    838 	 * the processing of adding this rnode to the freelist.
    839 	 */
    840 again:
    841 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
    842 
    843 	mutex_enter(&vp->v_lock);
    844 	if (vp->v_count > 1) {
    845 		vp->v_count--;
    846 		mutex_exit(&vp->v_lock);
    847 		rw_exit(&rp->r_hashq->r_lock);
    848 		return;
    849 	}
    850 	mutex_exit(&vp->v_lock);
    851 
    852 	/*
    853 	 * Make sure we don't put an rnode with a delegation
    854 	 * on the free list.
    855 	 */
    856 	if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
    857 		rw_exit(&rp->r_hashq->r_lock);
    858 		(void) nfs4delegreturn(rp,
    859 		    NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
    860 		goto again;
    861 	}
    862 
    863 	/*
    864 	 * Now that we have the hash queue lock, and we know there
    865 	 * are not anymore references on the vnode, check to make
    866 	 * sure there aren't any open streams still on the rnode.
    867 	 * If so, drop the hash queue lock, remove the open streams,
    868 	 * and recheck the v_count.
    869 	 */
    870 	mutex_enter(&rp->r_os_lock);
    871 	if (list_head(&rp->r_open_streams) != NULL) {
    872 		mutex_exit(&rp->r_os_lock);
    873 		rw_exit(&rp->r_hashq->r_lock);
    874 		if (nfs_zone() != VTOMI4(vp)->mi_zone)
    875 			nfs4_clear_open_streams(rp);
    876 		else
    877 			(void) nfs4close_all(vp, cr);
    878 		goto again;
    879 	}
    880 	mutex_exit(&rp->r_os_lock);
    881 
    882 	/*
    883 	 * Before we put it on the freelist, make sure there are no pages.
    884 	 * If there are, flush and commit of all of the dirty and
    885 	 * uncommitted pages, assuming the file system isn't read only.
    886 	 */
    887 	if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
    888 		rw_exit(&rp->r_hashq->r_lock);
    889 		r4flushpages(rp, cr);
    890 		goto again;
    891 	}
    892 
    893 	/*
    894 	 * Before we put it on the freelist, make sure there is no
    895 	 * active xattr directory cached, the freelist will not
    896 	 * have its entries r4inactive'd if there is still an active
    897 	 * rnode, thus nothing in the freelist can hold another
    898 	 * rnode active.
    899 	 */
    900 	xattr = rp->r_xattr_dir;
    901 	rp->r_xattr_dir = NULL;
    902 
    903 	/*
    904 	 * If there is no cached data or metadata for this file, then
    905 	 * put the rnode on the front of the freelist so that it will
    906 	 * be reused before other rnodes which may have cached data or
    907 	 * metadata associated with them.
    908 	 */
    909 	mutex_enter(&rp4freelist_lock);
    910 	if (rp4freelist == NULL) {
    911 		rp->r_freef = rp;
    912 		rp->r_freeb = rp;
    913 		rp4freelist = rp;
    914 	} else {
    915 		rp->r_freef = rp4freelist;
    916 		rp->r_freeb = rp4freelist->r_freeb;
    917 		rp4freelist->r_freeb->r_freef = rp;
    918 		rp4freelist->r_freeb = rp;
    919 		if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
    920 		    rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
    921 			rp4freelist = rp;
    922 	}
    923 	mutex_exit(&rp4freelist_lock);
    924 
    925 	rw_exit(&rp->r_hashq->r_lock);
    926 
    927 	if (xattr)
    928 		VN_RELE(xattr);
    929 }
    930 
    931 /*
    932  * Remove an rnode from the free list.
    933  *
    934  * The caller must be holding rp4freelist_lock and the rnode
    935  * must be on the freelist.
    936  */
    937 static void
    938 rp4_rmfree(rnode4_t *rp)
    939 {
    940 
    941 	ASSERT(MUTEX_HELD(&rp4freelist_lock));
    942 	ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
    943 
    944 	if (rp == rp4freelist) {
    945 		rp4freelist = rp->r_freef;
    946 		if (rp == rp4freelist)
    947 			rp4freelist = NULL;
    948 	}
    949 	rp->r_freeb->r_freef = rp->r_freef;
    950 	rp->r_freef->r_freeb = rp->r_freeb;
    951 
    952 	rp->r_freef = rp->r_freeb = NULL;
    953 }
    954 
    955 /*
    956  * Put a rnode in the hash table.
    957  *
    958  * The caller must be holding the exclusive hash queue lock
    959  */
    960 void
    961 rp4_addhash(rnode4_t *rp)
    962 {
    963 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
    964 	ASSERT(!(rp->r_flags & R4HASHED));
    965 
    966 #ifdef DEBUG
    967 	r4_dup_check(rp, RTOV4(rp)->v_vfsp);
    968 #endif
    969 
    970 	rp->r_hashf = rp->r_hashq->r_hashf;
    971 	rp->r_hashq->r_hashf = rp;
    972 	rp->r_hashb = (rnode4_t *)rp->r_hashq;
    973 	rp->r_hashf->r_hashb = rp;
    974 
    975 	mutex_enter(&rp->r_statelock);
    976 	rp->r_flags |= R4HASHED;
    977 	mutex_exit(&rp->r_statelock);
    978 }
    979 
    980 /*
    981  * Remove a rnode from the hash table.
    982  *
    983  * The caller must be holding the hash queue lock.
    984  */
    985 void
    986 rp4_rmhash_locked(rnode4_t *rp)
    987 {
    988 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
    989 	ASSERT(rp->r_flags & R4HASHED);
    990 
    991 	rp->r_hashb->r_hashf = rp->r_hashf;
    992 	rp->r_hashf->r_hashb = rp->r_hashb;
    993 
    994 	mutex_enter(&rp->r_statelock);
    995 	rp->r_flags &= ~R4HASHED;
    996 	mutex_exit(&rp->r_statelock);
    997 }
    998 
    999 /*
   1000  * Remove a rnode from the hash table.
   1001  *
   1002  * The caller must not be holding the hash queue lock.
   1003  */
   1004 void
   1005 rp4_rmhash(rnode4_t *rp)
   1006 {
   1007 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
   1008 	rp4_rmhash_locked(rp);
   1009 	rw_exit(&rp->r_hashq->r_lock);
   1010 }
   1011 
   1012 /*
   1013  * Lookup a rnode by fhandle.  Ignores rnodes that had failed recovery.
   1014  * Returns NULL if no match.  If an rnode is returned, the reference count
   1015  * on the master vnode is incremented.
   1016  *
   1017  * The caller must be holding the hash queue lock, either shared or exclusive.
   1018  */
   1019 rnode4_t *
   1020 r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
   1021 {
   1022 	rnode4_t *rp;
   1023 	vnode_t *vp;
   1024 
   1025 	ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
   1026 
   1027 	for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
   1028 		vp = RTOV4(rp);
   1029 		if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
   1030 
   1031 			mutex_enter(&rp->r_statelock);
   1032 			if (rp->r_flags & R4RECOVERR) {
   1033 				mutex_exit(&rp->r_statelock);
   1034 				continue;
   1035 			}
   1036 			mutex_exit(&rp->r_statelock);
   1037 #ifdef DEBUG
   1038 			r4_dup_check(rp, vfsp);
   1039 #endif
   1040 			if (rp->r_freef != NULL) {
   1041 				mutex_enter(&rp4freelist_lock);
   1042 				/*
   1043 				 * If the rnode is on the freelist,
   1044 				 * then remove it and use that reference
   1045 				 * as the new reference.  Otherwise,
   1046 				 * need to increment the reference count.
   1047 				 */
   1048 				if (rp->r_freef != NULL) {
   1049 					rp4_rmfree(rp);
   1050 					mutex_exit(&rp4freelist_lock);
   1051 				} else {
   1052 					mutex_exit(&rp4freelist_lock);
   1053 					VN_HOLD(vp);
   1054 				}
   1055 			} else
   1056 				VN_HOLD(vp);
   1057 
   1058 			/*
   1059 			 * if root vnode, set v_flag to indicate that
   1060 			 */
   1061 			if (isrootfh(fh, rp)) {
   1062 				if (!(vp->v_flag & VROOT)) {
   1063 					mutex_enter(&vp->v_lock);
   1064 					vp->v_flag |= VROOT;
   1065 					mutex_exit(&vp->v_lock);
   1066 				}
   1067 			}
   1068 			return (rp);
   1069 		}
   1070 	}
   1071 	return (NULL);
   1072 }
   1073 
   1074 /*
   1075  * Lookup an rnode by fhandle. Just a wrapper for r4find()
   1076  * that assumes the caller hasn't already got the lock
   1077  * on the hash bucket.
   1078  */
   1079 rnode4_t *
   1080 r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
   1081 {
   1082 	rnode4_t *rp;
   1083 	int index;
   1084 
   1085 	index = rtable4hash(fh);
   1086 	rw_enter(&rtable4[index].r_lock, RW_READER);
   1087 	rp = r4find(&rtable4[index], fh, vfsp);
   1088 	rw_exit(&rtable4[index].r_lock);
   1089 
   1090 	return (rp);
   1091 }
   1092 
   1093 /*
   1094  * Return >0 if there is a active vnode belonging to this vfs in the
   1095  * rtable4 cache.
   1096  *
   1097  * Several of these checks are done without holding the usual
   1098  * locks.  This is safe because destroy_rtable(), rp_addfree(),
   1099  * etc. will redo the necessary checks before actually destroying
   1100  * any rnodes.
   1101  */
   1102 int
   1103 check_rtable4(struct vfs *vfsp)
   1104 {
   1105 	rnode4_t *rp;
   1106 	vnode_t *vp;
   1107 	int busy = NFSV4_RTABLE4_OK;
   1108 	int index;
   1109 
   1110 	for (index = 0; index < rtable4size; index++) {
   1111 		rw_enter(&rtable4[index].r_lock, RW_READER);
   1112 
   1113 		for (rp = rtable4[index].r_hashf;
   1114 		    rp != (rnode4_t *)(&rtable4[index]);
   1115 		    rp = rp->r_hashf) {
   1116 
   1117 			vp = RTOV4(rp);
   1118 			if (vp->v_vfsp == vfsp) {
   1119 				if (rp->r_freef == NULL) {
   1120 					busy = NFSV4_RTABLE4_NOT_FREE_LIST;
   1121 				} else if (nfs4_has_pages(vp) &&
   1122 				    (rp->r_flags & R4DIRTY)) {
   1123 					busy = NFSV4_RTABLE4_DIRTY_PAGES;
   1124 				} else if (rp->r_count > 0) {
   1125 					busy = NFSV4_RTABLE4_POS_R_COUNT;
   1126 				}
   1127 
   1128 				if (busy != NFSV4_RTABLE4_OK) {
   1129 #ifdef DEBUG
   1130 					char *path;
   1131 
   1132 					path = fn_path(rp->r_svnode.sv_name);
   1133 					DTRACE_NFSV4_3(rnode__e__debug,
   1134 					    int, busy, char *, path,
   1135 					    rnode4_t *, rp);
   1136 					kmem_free(path, strlen(path)+1);
   1137 #endif
   1138 					rw_exit(&rtable4[index].r_lock);
   1139 					return (busy);
   1140 				}
   1141 			}
   1142 		}
   1143 		rw_exit(&rtable4[index].r_lock);
   1144 	}
   1145 	return (busy);
   1146 }
   1147 
   1148 /*
   1149  * Destroy inactive vnodes from the hash queues which
   1150  * belong to this vfs. All of the vnodes should be inactive.
   1151  * It is essential that we destroy all rnodes in case of
   1152  * forced unmount as well as in normal unmount case.
   1153  */
   1154 
   1155 void
   1156 destroy_rtable4(struct vfs *vfsp, cred_t *cr)
   1157 {
   1158 	int index;
   1159 	vnode_t *vp;
   1160 	rnode4_t *rp, *r_hashf, *rlist;
   1161 
   1162 	rlist = NULL;
   1163 
   1164 	for (index = 0; index < rtable4size; index++) {
   1165 		rw_enter(&rtable4[index].r_lock, RW_WRITER);
   1166 		for (rp = rtable4[index].r_hashf;
   1167 		    rp != (rnode4_t *)(&rtable4[index]);
   1168 		    rp = r_hashf) {
   1169 			/* save the hash pointer before destroying */
   1170 			r_hashf = rp->r_hashf;
   1171 
   1172 			vp = RTOV4(rp);
   1173 			if (vp->v_vfsp == vfsp) {
   1174 				mutex_enter(&rp4freelist_lock);
   1175 				if (rp->r_freef != NULL) {
   1176 					rp4_rmfree(rp);
   1177 					mutex_exit(&rp4freelist_lock);
   1178 					rp4_rmhash_locked(rp);
   1179 					rp->r_hashf = rlist;
   1180 					rlist = rp;
   1181 				} else
   1182 					mutex_exit(&rp4freelist_lock);
   1183 			}
   1184 		}
   1185 		rw_exit(&rtable4[index].r_lock);
   1186 	}
   1187 
   1188 	for (rp = rlist; rp != NULL; rp = r_hashf) {
   1189 		r_hashf = rp->r_hashf;
   1190 		/*
   1191 		 * This call to rp4_addfree will end up destroying the
   1192 		 * rnode, but in a safe way with the appropriate set
   1193 		 * of checks done.
   1194 		 */
   1195 		rp4_addfree(rp, cr);
   1196 	}
   1197 }
   1198 
   1199 /*
   1200  * This routine destroys all the resources of an rnode
   1201  * and finally the rnode itself.
   1202  */
   1203 static void
   1204 destroy_rnode4(rnode4_t *rp)
   1205 {
   1206 	vnode_t *vp;
   1207 	vfs_t *vfsp;
   1208 
   1209 	ASSERT(rp->r_deleg_type == OPEN_DELEGATE_NONE);
   1210 
   1211 	vp = RTOV4(rp);
   1212 	vfsp = vp->v_vfsp;
   1213 
   1214 	uninit_rnode4(rp);
   1215 	atomic_add_long((ulong_t *)&rnode4_new, -1);
   1216 #ifdef DEBUG
   1217 	clstat4_debug.nrnode.value.ui64--;
   1218 #endif
   1219 	kmem_cache_free(rnode4_cache, rp);
   1220 	vn_invalid(vp);
   1221 	vn_free(vp);
   1222 	VFS_RELE(vfsp);
   1223 }
   1224 
   1225 /*
   1226  * Invalidate the attributes on all rnodes forcing the next getattr
   1227  * to go over the wire.  Used to flush stale uid and gid mappings.
   1228  * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
   1229  */
   1230 void
   1231 nfs4_rnode_invalidate(struct vfs *vfsp)
   1232 {
   1233 	int index;
   1234 	rnode4_t *rp;
   1235 	vnode_t *vp;
   1236 
   1237 	/*
   1238 	 * Walk the hash queues looking for rnodes.
   1239 	 */
   1240 	for (index = 0; index < rtable4size; index++) {
   1241 		rw_enter(&rtable4[index].r_lock, RW_READER);
   1242 		for (rp = rtable4[index].r_hashf;
   1243 		    rp != (rnode4_t *)(&rtable4[index]);
   1244 		    rp = rp->r_hashf) {
   1245 			vp = RTOV4(rp);
   1246 			if (vfsp != NULL && vp->v_vfsp != vfsp)
   1247 				continue;
   1248 
   1249 			if (!mutex_tryenter(&rp->r_statelock))
   1250 				continue;
   1251 
   1252 			/*
   1253 			 * Expire the attributes by resetting the change
   1254 			 * and attr timeout.
   1255 			 */
   1256 			rp->r_change = 0;
   1257 			PURGE_ATTRCACHE4_LOCKED(rp);
   1258 			mutex_exit(&rp->r_statelock);
   1259 		}
   1260 		rw_exit(&rtable4[index].r_lock);
   1261 	}
   1262 }
   1263 
   1264 /*
   1265  * Flush all vnodes in this (or every) vfs.
   1266  * Used by nfs_sync and by nfs_unmount.
   1267  */
   1268 void
   1269 r4flush(struct vfs *vfsp, cred_t *cr)
   1270 {
   1271 	int index;
   1272 	rnode4_t *rp;
   1273 	vnode_t *vp, **vplist;
   1274 	long num, cnt;
   1275 
   1276 	/*
   1277 	 * Check to see whether there is anything to do.
   1278 	 */
   1279 	num = rnode4_new;
   1280 	if (num == 0)
   1281 		return;
   1282 
   1283 	/*
   1284 	 * Allocate a slot for all currently active rnodes on the
   1285 	 * supposition that they all may need flushing.
   1286 	 */
   1287 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
   1288 	cnt = 0;
   1289 
   1290 	/*
   1291 	 * Walk the hash queues looking for rnodes with page
   1292 	 * lists associated with them.  Make a list of these
   1293 	 * files.
   1294 	 */
   1295 	for (index = 0; index < rtable4size; index++) {
   1296 		rw_enter(&rtable4[index].r_lock, RW_READER);
   1297 		for (rp = rtable4[index].r_hashf;
   1298 		    rp != (rnode4_t *)(&rtable4[index]);
   1299 		    rp = rp->r_hashf) {
   1300 			vp = RTOV4(rp);
   1301 			/*
   1302 			 * Don't bother sync'ing a vp if it
   1303 			 * is part of virtual swap device or
   1304 			 * if VFS is read-only
   1305 			 */
   1306 			if (IS_SWAPVP(vp) || vn_is_readonly(vp))
   1307 				continue;
   1308 			/*
   1309 			 * If flushing all mounted file systems or
   1310 			 * the vnode belongs to this vfs, has pages
   1311 			 * and is marked as either dirty or mmap'd,
   1312 			 * hold and add this vnode to the list of
   1313 			 * vnodes to flush.
   1314 			 */
   1315 			if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
   1316 			    nfs4_has_pages(vp) &&
   1317 			    ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
   1318 				VN_HOLD(vp);
   1319 				vplist[cnt++] = vp;
   1320 				if (cnt == num) {
   1321 					rw_exit(&rtable4[index].r_lock);
   1322 					goto toomany;
   1323 				}
   1324 			}
   1325 		}
   1326 		rw_exit(&rtable4[index].r_lock);
   1327 	}
   1328 toomany:
   1329 
   1330 	/*
   1331 	 * Flush and release all of the files on the list.
   1332 	 */
   1333 	while (cnt-- > 0) {
   1334 		vp = vplist[cnt];
   1335 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
   1336 		VN_RELE(vp);
   1337 	}
   1338 
   1339 	/*
   1340 	 * Free the space allocated to hold the list.
   1341 	 */
   1342 	kmem_free(vplist, num * sizeof (*vplist));
   1343 }
   1344 
   1345 int
   1346 nfs4_free_data_reclaim(rnode4_t *rp)
   1347 {
   1348 	char *contents;
   1349 	vnode_t *xattr;
   1350 	int size;
   1351 	vsecattr_t *vsp;
   1352 	int freed;
   1353 	bool_t rdc = FALSE;
   1354 
   1355 	/*
   1356 	 * Free any held caches which may
   1357 	 * be associated with this rnode.
   1358 	 */
   1359 	mutex_enter(&rp->r_statelock);
   1360 	if (rp->r_dir != NULL)
   1361 		rdc = TRUE;
   1362 	contents = rp->r_symlink.contents;
   1363 	size = rp->r_symlink.size;
   1364 	rp->r_symlink.contents = NULL;
   1365 	vsp = rp->r_secattr;
   1366 	rp->r_secattr = NULL;
   1367 	xattr = rp->r_xattr_dir;
   1368 	rp->r_xattr_dir = NULL;
   1369 	mutex_exit(&rp->r_statelock);
   1370 
   1371 	/*
   1372 	 * Free the access cache entries.
   1373 	 */
   1374 	freed = nfs4_access_purge_rp(rp);
   1375 
   1376 	if (rdc == FALSE && contents == NULL && vsp == NULL && xattr == NULL)
   1377 		return (freed);
   1378 
   1379 	/*
   1380 	 * Free the readdir cache entries, incompletely if we can't block.
   1381 	 */
   1382 	nfs4_purge_rddir_cache(RTOV4(rp));
   1383 
   1384 	/*
   1385 	 * Free the symbolic link cache.
   1386 	 */
   1387 	if (contents != NULL) {
   1388 
   1389 		kmem_free((void *)contents, size);
   1390 	}
   1391 
   1392 	/*
   1393 	 * Free any cached ACL.
   1394 	 */
   1395 	if (vsp != NULL)
   1396 		nfs4_acl_free_cache(vsp);
   1397 
   1398 	/*
   1399 	 * Release the xattr directory vnode
   1400 	 */
   1401 	if (xattr != NULL)
   1402 		VN_RELE(xattr);
   1403 
   1404 	return (1);
   1405 }
   1406 
   1407 static int
   1408 nfs4_active_data_reclaim(rnode4_t *rp)
   1409 {
   1410 	char *contents;
   1411 	vnode_t *xattr;
   1412 	int size;
   1413 	vsecattr_t *vsp;
   1414 	int freed;
   1415 	bool_t rdc = FALSE;
   1416 
   1417 	/*
   1418 	 * Free any held credentials and caches which
   1419 	 * may be associated with this rnode.
   1420 	 */
   1421 	if (!mutex_tryenter(&rp->r_statelock))
   1422 		return (0);
   1423 	contents = rp->r_symlink.contents;
   1424 	size = rp->r_symlink.size;
   1425 	rp->r_symlink.contents = NULL;
   1426 	vsp = rp->r_secattr;
   1427 	rp->r_secattr = NULL;
   1428 	if (rp->r_dir != NULL)
   1429 		rdc = TRUE;
   1430 	xattr = rp->r_xattr_dir;
   1431 	rp->r_xattr_dir = NULL;
   1432 	mutex_exit(&rp->r_statelock);
   1433 
   1434 	/*
   1435 	 * Free the access cache entries.
   1436 	 */
   1437 	freed = nfs4_access_purge_rp(rp);
   1438 
   1439 	if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
   1440 		return (freed);
   1441 
   1442 	/*
   1443 	 * Free the symbolic link cache.
   1444 	 */
   1445 	if (contents != NULL) {
   1446 
   1447 		kmem_free((void *)contents, size);
   1448 	}
   1449 
   1450 	/*
   1451 	 * Free any cached ACL.
   1452 	 */
   1453 	if (vsp != NULL)
   1454 		nfs4_acl_free_cache(vsp);
   1455 
   1456 	nfs4_purge_rddir_cache(RTOV4(rp));
   1457 
   1458 	/*
   1459 	 * Release the xattr directory vnode
   1460 	 */
   1461 	if (xattr != NULL)
   1462 		VN_RELE(xattr);
   1463 
   1464 	return (1);
   1465 }
   1466 
   1467 static int
   1468 nfs4_free_reclaim(void)
   1469 {
   1470 	int freed;
   1471 	rnode4_t *rp;
   1472 
   1473 #ifdef DEBUG
   1474 	clstat4_debug.f_reclaim.value.ui64++;
   1475 #endif
   1476 	freed = 0;
   1477 	mutex_enter(&rp4freelist_lock);
   1478 	rp = rp4freelist;
   1479 	if (rp != NULL) {
   1480 		do {
   1481 			if (nfs4_free_data_reclaim(rp))
   1482 				freed = 1;
   1483 		} while ((rp = rp->r_freef) != rp4freelist);
   1484 	}
   1485 	mutex_exit(&rp4freelist_lock);
   1486 	return (freed);
   1487 }
   1488 
   1489 static int
   1490 nfs4_active_reclaim(void)
   1491 {
   1492 	int freed;
   1493 	int index;
   1494 	rnode4_t *rp;
   1495 
   1496 #ifdef DEBUG
   1497 	clstat4_debug.a_reclaim.value.ui64++;
   1498 #endif
   1499 	freed = 0;
   1500 	for (index = 0; index < rtable4size; index++) {
   1501 		rw_enter(&rtable4[index].r_lock, RW_READER);
   1502 		for (rp = rtable4[index].r_hashf;
   1503 		    rp != (rnode4_t *)(&rtable4[index]);
   1504 		    rp = rp->r_hashf) {
   1505 			if (nfs4_active_data_reclaim(rp))
   1506 				freed = 1;
   1507 		}
   1508 		rw_exit(&rtable4[index].r_lock);
   1509 	}
   1510 	return (freed);
   1511 }
   1512 
   1513 static int
   1514 nfs4_rnode_reclaim(void)
   1515 {
   1516 	int freed;
   1517 	rnode4_t *rp;
   1518 	vnode_t *vp;
   1519 
   1520 #ifdef DEBUG
   1521 	clstat4_debug.r_reclaim.value.ui64++;
   1522 #endif
   1523 	freed = 0;
   1524 	mutex_enter(&rp4freelist_lock);
   1525 	while ((rp = rp4freelist) != NULL) {
   1526 		rp4_rmfree(rp);
   1527 		mutex_exit(&rp4freelist_lock);
   1528 		if (rp->r_flags & R4HASHED) {
   1529 			vp = RTOV4(rp);
   1530 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
   1531 			mutex_enter(&vp->v_lock);
   1532 			if (vp->v_count > 1) {
   1533 				vp->v_count--;
   1534 				mutex_exit(&vp->v_lock);
   1535 				rw_exit(&rp->r_hashq->r_lock);
   1536 				mutex_enter(&rp4freelist_lock);
   1537 				continue;
   1538 			}
   1539 			mutex_exit(&vp->v_lock);
   1540 			rp4_rmhash_locked(rp);
   1541 			rw_exit(&rp->r_hashq->r_lock);
   1542 		}
   1543 		/*
   1544 		 * This call to rp_addfree will end up destroying the
   1545 		 * rnode, but in a safe way with the appropriate set
   1546 		 * of checks done.
   1547 		 */
   1548 		rp4_addfree(rp, CRED());
   1549 		mutex_enter(&rp4freelist_lock);
   1550 	}
   1551 	mutex_exit(&rp4freelist_lock);
   1552 	return (freed);
   1553 }
   1554 
   1555 /*ARGSUSED*/
   1556 static void
   1557 nfs4_reclaim(void *cdrarg)
   1558 {
   1559 #ifdef DEBUG
   1560 	clstat4_debug.reclaim.value.ui64++;
   1561 #endif
   1562 	if (nfs4_free_reclaim())
   1563 		return;
   1564 
   1565 	if (nfs4_active_reclaim())
   1566 		return;
   1567 
   1568 	(void) nfs4_rnode_reclaim();
   1569 }
   1570 
   1571 /*
   1572  * Returns the clientid4 to use for the given mntinfo4.  Note that the
   1573  * clientid can change if the caller drops mi_recovlock.
   1574  */
   1575 
   1576 clientid4
   1577 mi2clientid(mntinfo4_t *mi)
   1578 {
   1579 	nfs4_server_t	*sp;
   1580 	clientid4	clientid = 0;
   1581 
   1582 	/* this locks down sp if it is found */
   1583 	sp = find_nfs4_server(mi);
   1584 	if (sp != NULL) {
   1585 		clientid = sp->clientid;
   1586 		mutex_exit(&sp->s_lock);
   1587 		nfs4_server_rele(sp);
   1588 	}
   1589 	return (clientid);
   1590 }
   1591 
   1592 /*
   1593  * Return the current lease time for the server associated with the given
   1594  * file.  Note that the lease time could change immediately after this
   1595  * call.
   1596  */
   1597 
   1598 time_t
   1599 r2lease_time(rnode4_t *rp)
   1600 {
   1601 	nfs4_server_t	*sp;
   1602 	time_t		lease_time;
   1603 	mntinfo4_t	*mi = VTOMI4(RTOV4(rp));
   1604 
   1605 	(void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
   1606 
   1607 	/* this locks down sp if it is found */
   1608 	sp = find_nfs4_server(VTOMI4(RTOV4(rp)));
   1609 
   1610 	if (VTOMI4(RTOV4(rp))->mi_vfsp->vfs_flag & VFS_UNMOUNTED) {
   1611 		if (sp != NULL) {
   1612 			mutex_exit(&sp->s_lock);
   1613 			nfs4_server_rele(sp);
   1614 		}
   1615 		nfs_rw_exit(&mi->mi_recovlock);
   1616 		return (1);		/* 1 second */
   1617 	}
   1618 
   1619 	ASSERT(sp != NULL);
   1620 
   1621 	lease_time = sp->s_lease_time;
   1622 
   1623 	mutex_exit(&sp->s_lock);
   1624 	nfs4_server_rele(sp);
   1625 	nfs_rw_exit(&mi->mi_recovlock);
   1626 
   1627 	return (lease_time);
   1628 }
   1629 
   1630 /*
   1631  * Return a list with information about all the known open instances for
   1632  * a filesystem. The caller must call r4releopenlist() when done with the
   1633  * list.
   1634  *
   1635  * We are safe at looking at os_valid and os_pending_close across dropping
   1636  * the 'os_sync_lock' to count up the number of open streams and then
   1637  * allocate memory for the osp list due to:
   1638  *	-Looking at os_pending_close is safe since this routine is
   1639  *	only called via recovery, and os_pending_close can only be set via
   1640  *	a non-recovery operation (which are all blocked when recovery
   1641  *	is active).
   1642  *
   1643  *	-Examining os_valid is safe since non-recovery operations, which
   1644  *	could potentially switch os_valid to 0, are blocked (via
   1645  *	nfs4_start_fop) and recovery is single-threaded per mntinfo4_t
   1646  *	(which means we are the only recovery thread potentially acting
   1647  *	on this open stream).
   1648  */
   1649 
   1650 nfs4_opinst_t *
   1651 r4mkopenlist(mntinfo4_t *mi)
   1652 {
   1653 	nfs4_opinst_t *reopenlist, *rep;
   1654 	rnode4_t *rp;
   1655 	vnode_t *vp;
   1656 	vfs_t *vfsp = mi->mi_vfsp;
   1657 	int numosp;
   1658 	nfs4_open_stream_t *osp;
   1659 	int index;
   1660 	open_delegation_type4 dtype;
   1661 	int hold_vnode;
   1662 
   1663 	reopenlist = NULL;
   1664 
   1665 	for (index = 0; index < rtable4size; index++) {
   1666 		rw_enter(&rtable4[index].r_lock, RW_READER);
   1667 		for (rp = rtable4[index].r_hashf;
   1668 		    rp != (rnode4_t *)(&rtable4[index]);
   1669 		    rp = rp->r_hashf) {
   1670 
   1671 			vp = RTOV4(rp);
   1672 			if (vp->v_vfsp != vfsp)
   1673 				continue;
   1674 			hold_vnode = 0;
   1675 
   1676 			mutex_enter(&rp->r_os_lock);
   1677 
   1678 			/* Count the number of valid open_streams of the file */
   1679 			numosp = 0;
   1680 			for (osp = list_head(&rp->r_open_streams); osp != NULL;
   1681 			    osp = list_next(&rp->r_open_streams, osp)) {
   1682 				mutex_enter(&osp->os_sync_lock);
   1683 				if (osp->os_valid && !osp->os_pending_close)
   1684 					numosp++;
   1685 				mutex_exit(&osp->os_sync_lock);
   1686 			}
   1687 
   1688 			/* Fill in the valid open streams per vp */
   1689 			if (numosp > 0) {
   1690 				int j;
   1691 
   1692 				hold_vnode = 1;
   1693 
   1694 				/*
   1695 				 * Add a new open instance to the list
   1696 				 */
   1697 				rep = kmem_zalloc(sizeof (*reopenlist),
   1698 				    KM_SLEEP);
   1699 				rep->re_next = reopenlist;
   1700 				reopenlist = rep;
   1701 
   1702 				rep->re_vp = vp;
   1703 				rep->re_osp = kmem_zalloc(
   1704 				    numosp * sizeof (*(rep->re_osp)),
   1705 				    KM_SLEEP);
   1706 				rep->re_numosp = numosp;
   1707 
   1708 				j = 0;
   1709 				for (osp = list_head(&rp->r_open_streams);
   1710 				    osp != NULL;
   1711 				    osp = list_next(&rp->r_open_streams, osp)) {
   1712 
   1713 					mutex_enter(&osp->os_sync_lock);
   1714 					if (osp->os_valid &&
   1715 					    !osp->os_pending_close) {
   1716 						osp->os_ref_count++;
   1717 						rep->re_osp[j] = osp;
   1718 						j++;
   1719 					}
   1720 					mutex_exit(&osp->os_sync_lock);
   1721 				}
   1722 				/*
   1723 				 * Assuming valid osp(s) stays valid between
   1724 				 * the time obtaining j and numosp.
   1725 				 */
   1726 				ASSERT(j == numosp);
   1727 			}
   1728 
   1729 			mutex_exit(&rp->r_os_lock);
   1730 			/* do this here to keep v_lock > r_os_lock */
   1731 			if (hold_vnode)
   1732 				VN_HOLD(vp);
   1733 			mutex_enter(&rp->r_statev4_lock);
   1734 			if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
   1735 				/*
   1736 				 * If this rnode holds a delegation,
   1737 				 * but if there are no valid open streams,
   1738 				 * then just discard the delegation
   1739 				 * without doing delegreturn.
   1740 				 */
   1741 				if (numosp > 0)
   1742 					rp->r_deleg_needs_recovery =
   1743 					    rp->r_deleg_type;
   1744 			}
   1745 			/* Save the delegation type for use outside the lock */
   1746 			dtype = rp->r_deleg_type;
   1747 			mutex_exit(&rp->r_statev4_lock);
   1748 
   1749 			/*
   1750 			 * If we have a delegation then get rid of it.
   1751 			 * We've set rp->r_deleg_needs_recovery so we have
   1752 			 * enough information to recover.
   1753 			 */
   1754 			if (dtype != OPEN_DELEGATE_NONE) {
   1755 				(void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
   1756 			}
   1757 		}
   1758 		rw_exit(&rtable4[index].r_lock);
   1759 	}
   1760 	return (reopenlist);
   1761 }
   1762 
   1763 /*
   1764  * Release the list of open instance references.
   1765  */
   1766 
   1767 void
   1768 r4releopenlist(nfs4_opinst_t *reopenp)
   1769 {
   1770 	nfs4_opinst_t *rep, *next;
   1771 	int i;
   1772 
   1773 	for (rep = reopenp; rep; rep = next) {
   1774 		next = rep->re_next;
   1775 
   1776 		for (i = 0; i < rep->re_numosp; i++)
   1777 			open_stream_rele(rep->re_osp[i], VTOR4(rep->re_vp));
   1778 
   1779 		VN_RELE(rep->re_vp);
   1780 		kmem_free(rep->re_osp,
   1781 		    rep->re_numosp * sizeof (*(rep->re_osp)));
   1782 
   1783 		kmem_free(rep, sizeof (*rep));
   1784 	}
   1785 }
   1786 
   1787 int
   1788 nfs4_rnode_init(void)
   1789 {
   1790 	ulong_t nrnode4_max;
   1791 	int i;
   1792 
   1793 	/*
   1794 	 * Compute the size of the rnode4 hash table
   1795 	 */
   1796 	if (nrnode <= 0)
   1797 		nrnode = ncsize;
   1798 	nrnode4_max =
   1799 	    (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4));
   1800 	if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) {
   1801 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
   1802 		    "setting nrnode to max value of %ld", nrnode4_max);
   1803 		nrnode = nrnode4_max;
   1804 	}
   1805 	rtable4size = 1 << highbit(nrnode / rnode4_hashlen);
   1806 	rtable4mask = rtable4size - 1;
   1807 
   1808 	/*
   1809 	 * Allocate and initialize the hash buckets
   1810 	 */
   1811 	rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP);
   1812 	for (i = 0; i < rtable4size; i++) {
   1813 		rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]);
   1814 		rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]);
   1815 		rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL);
   1816 	}
   1817 
   1818 	rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t),
   1819 	    0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0);
   1820 
   1821 	return (0);
   1822 }
   1823 
   1824 int
   1825 nfs4_rnode_fini(void)
   1826 {
   1827 	int i;
   1828 
   1829 	/*
   1830 	 * Deallocate the rnode hash queues
   1831 	 */
   1832 	kmem_cache_destroy(rnode4_cache);
   1833 
   1834 	for (i = 0; i < rtable4size; i++)
   1835 		rw_destroy(&rtable4[i].r_lock);
   1836 
   1837 	kmem_free(rtable4, rtable4size * sizeof (*rtable4));
   1838 
   1839 	return (0);
   1840 }
   1841 
   1842 /*
   1843  * Return non-zero if the given filehandle refers to the root filehandle
   1844  * for the given rnode.
   1845  */
   1846 
   1847 static int
   1848 isrootfh(nfs4_sharedfh_t *fh, rnode4_t *rp)
   1849 {
   1850 	int isroot;
   1851 
   1852 	isroot = 0;
   1853 	if (SFH4_SAME(VTOMI4(RTOV4(rp))->mi_rootfh, fh))
   1854 		isroot = 1;
   1855 
   1856 	return (isroot);
   1857 }
   1858 
   1859 /*
   1860  * The r4_stub_* routines assume that the rnode is newly activated, and
   1861  * that the caller either holds the hash bucket r_lock for this rnode as
   1862  * RW_WRITER, or holds r_statelock.
   1863  */
   1864 static void
   1865 r4_stub_set(rnode4_t *rp, nfs4_stub_type_t type)
   1866 {
   1867 	vnode_t *vp = RTOV4(rp);
   1868 	krwlock_t *hash_lock = &rp->r_hashq->r_lock;
   1869 
   1870 	ASSERT(RW_WRITE_HELD(hash_lock) || MUTEX_HELD(&rp->r_statelock));
   1871 
   1872 	rp->r_stub_type = type;
   1873 
   1874 	/*
   1875 	 * Safely switch this vnode to the trigger vnodeops.
   1876 	 *
   1877 	 * Currently, we don't ever switch a trigger vnode back to using
   1878 	 * "regular" v4 vnodeops. NFS4_STUB_NONE is only used to note that
   1879 	 * a new v4 object is not a trigger, and it will already have the
   1880 	 * correct v4 vnodeops by default. So, no "else" case required here.
   1881 	 */
   1882 	if (type != NFS4_STUB_NONE)
   1883 		vn_setops(vp, nfs4_trigger_vnodeops);
   1884 }
   1885 
   1886 void
   1887 r4_stub_mirrormount(rnode4_t *rp)
   1888 {
   1889 	r4_stub_set(rp, NFS4_STUB_MIRRORMOUNT);
   1890 }
   1891 
   1892 void
   1893 r4_stub_none(rnode4_t *rp)
   1894 {
   1895 	r4_stub_set(rp, NFS4_STUB_NONE);
   1896 }
   1897 
   1898 #ifdef DEBUG
   1899 
   1900 /*
   1901  * Look in the rnode table for other rnodes that have the same filehandle.
   1902  * Assume the lock is held for the hash chain of checkrp
   1903  */
   1904 
   1905 static void
   1906 r4_dup_check(rnode4_t *checkrp, vfs_t *vfsp)
   1907 {
   1908 	rnode4_t *rp;
   1909 	vnode_t *tvp;
   1910 	nfs4_fhandle_t fh, fh2;
   1911 	int index;
   1912 
   1913 	if (!r4_check_for_dups)
   1914 		return;
   1915 
   1916 	ASSERT(RW_LOCK_HELD(&checkrp->r_hashq->r_lock));
   1917 
   1918 	sfh4_copyval(checkrp->r_fh, &fh);
   1919 
   1920 	for (index = 0; index < rtable4size; index++) {
   1921 
   1922 		if (&rtable4[index] != checkrp->r_hashq)
   1923 			rw_enter(&rtable4[index].r_lock, RW_READER);
   1924 
   1925 		for (rp = rtable4[index].r_hashf;
   1926 		    rp != (rnode4_t *)(&rtable4[index]);
   1927 		    rp = rp->r_hashf) {
   1928 
   1929 			if (rp == checkrp)
   1930 				continue;
   1931 
   1932 			tvp = RTOV4(rp);
   1933 			if (tvp->v_vfsp != vfsp)
   1934 				continue;
   1935 
   1936 			sfh4_copyval(rp->r_fh, &fh2);
   1937 			if (nfs4cmpfhandle(&fh, &fh2) == 0) {
   1938 				cmn_err(CE_PANIC, "rnodes with same fs, fh "
   1939 				    "(%p, %p)", (void *)checkrp, (void *)rp);
   1940 			}
   1941 		}
   1942 
   1943 		if (&rtable4[index] != checkrp->r_hashq)
   1944 			rw_exit(&rtable4[index].r_lock);
   1945 	}
   1946 }
   1947 
   1948 #endif /* DEBUG */
   1949