Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/systm.h>
     27 #include <sys/systeminfo.h>
     28 #include <sys/vfs.h>
     29 #include <sys/vfs_opreg.h>
     30 #include <sys/vnode.h>
     31 #include <sys/kmem.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/atomic.h>
     34 #include <sys/clconf.h>
     35 #include <sys/cladm.h>
     36 #include <sys/flock.h>
     37 #include <nfs/export.h>
     38 #include <nfs/nfs.h>
     39 #include <nfs/nfs4.h>
     40 #include <nfs/nfssys.h>
     41 #include <nfs/lm.h>
     42 #include <sys/pathname.h>
     43 #include <sys/sdt.h>
     44 #include <sys/nvpair.h>
     45 #include <sys/sdt.h>
     46 #include <sys/disp.h>
     47 #include <sys/id_space.h>
     48 
     49 #include <nfs/nfs_sstor_impl.h>
     50 #include <nfs/mds_state.h>
     51 
     52 #include <nfs/spe_impl.h>
     53 
     54 extern int nfs_doorfd;
     55 
     56 
     57 stateid4 special0 = {
     58 	0,
     59 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
     60 };
     61 
     62 stateid4 special1 = {
     63 	0xffffffff,
     64 	{
     65 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
     66 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
     67 		(char)0xff, (char)0xff, (char)0xff, (char)0xff
     68 	}
     69 };
     70 
     71 
     72 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
     73 			stateid4_cmp(id, &special1))
     74 
     75 /* For embedding the cluster nodeid into our clientid */
     76 #define	CLUSTER_NODEID_SHIFT	24
     77 #define	CLUSTER_MAX_NODEID	255
     78 
     79 #ifdef DEBUG
     80 int rfs4_debug;
     81 #endif
     82 
     83 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
     84 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
     85 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
     86 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
     87 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
     88 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
     89 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
     90 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
     91 	NULL,			NULL
     92 };
     93 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
     94 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
     95 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
     96 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
     97 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
     98 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
     99 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
    100 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
    101 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
    102 	NULL,			NULL
    103 };
    104 
    105 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, nfs_server_instance_t *sip);
    106 static void rfs4_ss_write(nfs_server_instance_t *, rfs4_client_t *, char *);
    107 static void rfs4_ss_delete_client(nfs_server_instance_t *, char *);
    108 static void rfs4_ss_delete_oldstate(nfs_server_instance_t *);
    109 static void rfs4_clean_reclaim_list(nfs_server_instance_t *);
    110 void rfs4_ss_retrieve_state(nfs_server_instance_t *);
    111 
    112 /*
    113  * Module load initialization
    114  */
    115 int
    116 rfs4_srvrinit(void)
    117 {
    118 	extern void nsi_cache_init();
    119 	extern void mds_srvrinit();
    120 	extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
    121 	extern void rfs4_ntov_init(void);
    122 
    123 	rw_init(&nsi_lock, NULL, RW_DEFAULT, NULL);
    124 
    125 	list_create(&nsi_head, sizeof (nfs_server_instance_t),
    126 	    offsetof(nfs_server_instance_t, nsi_list));
    127 
    128 	/* create the nfs_server_instance keme cache */
    129 	nsi_cache_init();
    130 
    131 	rfs4_client_clrst = rfs4_clear_client_state;
    132 
    133 	rfs4_ntov_init();
    134 
    135 	mds_srvrinit();
    136 
    137 	return (0);
    138 }
    139 
    140 /*
    141  * Couple of simple init/destroy functions for a general waiter
    142  */
    143 void
    144 rfs4_sw_init(rfs4_state_wait_t *swp)
    145 {
    146 	mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
    147 	cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
    148 	swp->sw_active = FALSE;
    149 	swp->sw_wait_count = 0;
    150 }
    151 
    152 void
    153 rfs4_sw_destroy(rfs4_state_wait_t *swp)
    154 {
    155 	mutex_destroy(swp->sw_cv_lock);
    156 	cv_destroy(swp->sw_cv);
    157 }
    158 
    159 void
    160 rfs4_sw_enter(rfs4_state_wait_t *swp)
    161 {
    162 	mutex_enter(swp->sw_cv_lock);
    163 	while (swp->sw_active) {
    164 		swp->sw_wait_count++;
    165 		cv_wait(swp->sw_cv, swp->sw_cv_lock);
    166 		swp->sw_wait_count--;
    167 	}
    168 	ASSERT(swp->sw_active == FALSE);
    169 	swp->sw_active = TRUE;
    170 	mutex_exit(swp->sw_cv_lock);
    171 }
    172 
    173 void
    174 rfs4_sw_exit(rfs4_state_wait_t *swp)
    175 {
    176 	mutex_enter(swp->sw_cv_lock);
    177 	ASSERT(swp->sw_active == TRUE);
    178 	swp->sw_active = FALSE;
    179 	if (swp->sw_wait_count != 0)
    180 		cv_broadcast(swp->sw_cv);
    181 	mutex_exit(swp->sw_cv_lock);
    182 }
    183 
    184 static void
    185 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
    186 {
    187 	lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
    188 	lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
    189 
    190 	if (sres->status == NFS4ERR_DENIED) {
    191 		dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
    192 		bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
    193 	}
    194 }
    195 
    196 static void
    197 deep_lock_free(LOCK4res *res)
    198 {
    199 	lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
    200 
    201 	if (res->status == NFS4ERR_DENIED)
    202 		kmem_free(lo->owner_val, lo->owner_len);
    203 }
    204 
    205 static void
    206 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
    207 {
    208 	nfsace4 *sacep, *dacep;
    209 
    210 	if (sres->status != NFS4_OK) {
    211 		return;
    212 	}
    213 
    214 	dres->attrset = sres->attrset;
    215 
    216 	switch (sres->delegation.delegation_type) {
    217 	case OPEN_DELEGATE_NONE:
    218 		return;
    219 	case OPEN_DELEGATE_READ:
    220 		sacep = &sres->delegation.open_delegation4_u.read.permissions;
    221 		dacep = &dres->delegation.open_delegation4_u.read.permissions;
    222 		break;
    223 	case OPEN_DELEGATE_WRITE:
    224 		sacep = &sres->delegation.open_delegation4_u.write.permissions;
    225 		dacep = &dres->delegation.open_delegation4_u.write.permissions;
    226 		break;
    227 	}
    228 	dacep->who.utf8string_val =
    229 	    kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
    230 	bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
    231 	    sacep->who.utf8string_len);
    232 }
    233 
    234 static void
    235 deep_open_free(OPEN4res *res)
    236 {
    237 	nfsace4 *acep;
    238 	if (res->status != NFS4_OK)
    239 		return;
    240 
    241 	switch (res->delegation.delegation_type) {
    242 	case OPEN_DELEGATE_NONE:
    243 		return;
    244 	case OPEN_DELEGATE_READ:
    245 		acep = &res->delegation.open_delegation4_u.read.permissions;
    246 		break;
    247 	case OPEN_DELEGATE_WRITE:
    248 		acep = &res->delegation.open_delegation4_u.write.permissions;
    249 		break;
    250 	}
    251 
    252 	if (acep->who.utf8string_val) {
    253 		kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
    254 		acep->who.utf8string_val = NULL;
    255 	}
    256 }
    257 
    258 void
    259 rfs4_free_reply(nfs_resop4 *rp)
    260 {
    261 	switch (rp->resop) {
    262 	case OP_LOCK:
    263 		deep_lock_free(&rp->nfs_resop4_u.oplock);
    264 		break;
    265 	case OP_OPEN:
    266 		deep_open_free(&rp->nfs_resop4_u.opopen);
    267 	default:
    268 		break;
    269 	}
    270 }
    271 
    272 void
    273 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
    274 {
    275 	*dst = *src;
    276 
    277 	/* Handle responses that need deep copy */
    278 	switch (src->resop) {
    279 	case OP_LOCK:
    280 		deep_lock_copy(&dst->nfs_resop4_u.oplock,
    281 		    &src->nfs_resop4_u.oplock);
    282 		break;
    283 	case OP_OPEN:
    284 		deep_open_copy(&dst->nfs_resop4_u.opopen,
    285 		    &src->nfs_resop4_u.opopen);
    286 		break;
    287 	default:
    288 		break;
    289 	};
    290 }
    291 
    292 /*
    293  * This is the implementation of the underlying state engine. The
    294  * public interface to this engine is described by
    295  * nfs4_state.h. Callers to the engine should hold no state engine
    296  * locks when they call in to it. If the protocol needs to lock data
    297  * structures it should do so after acquiring all references to them
    298  * first and then follow the following lock order:
    299  *
    300  *	client > openowner > state > lo_state > lockowner > file.
    301  *
    302  * Internally we only allow a thread to hold one hash bucket lock at a
    303  * time and the lock is higher in the lock order (must be acquired
    304  * first) than the data structure that is on that hash list.
    305  *
    306  * If a new reference was acquired by the caller, that reference needs
    307  * to be released after releasing all acquired locks with the
    308  * corresponding rfs4_*_rele routine.
    309  */
    310 
    311 /*
    312  * This code is some what prototypical for now. Its purpose currently is to
    313  * implement the interfaces sufficiently to finish the higher protocol
    314  * elements. This will be replaced by a dynamically resizeable tables
    315  * backed by kmem_cache allocator. However synchronization is handled
    316  * correctly (I hope) and will not change by much.  The mutexes for
    317  * the hash buckets that can be used to create new instances of data
    318  * structures  might be good candidates to evolve into reader writer
    319  * locks. If it has to do a creation, it would be holding the
    320  * mutex across a kmem_alloc with KM_SLEEP specified.
    321  */
    322 
    323 
    324 
    325 void
    326 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
    327 {
    328 	kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
    329 }
    330 
    331 static rfs4_ss_pn_t *
    332 rfs4_ss_pnalloc(char *dir, char *leaf)
    333 {
    334 	rfs4_ss_pn_t *ss_pn;
    335 	int 	dir_len, leaf_len;
    336 
    337 	/*
    338 	 * validate we have a resonable path
    339 	 * (account for the '/' and trailing null)
    340 	 */
    341 	if ((dir_len = strlen(dir)) > MAXPATHLEN ||
    342 	    (leaf_len = strlen(leaf)) > MAXNAMELEN ||
    343 	    (dir_len + leaf_len + 2) > MAXPATHLEN) {
    344 		return (NULL);
    345 	}
    346 
    347 	ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
    348 
    349 	(void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
    350 	/* Handy pointer to just the leaf name */
    351 	ss_pn->leaf = ss_pn->pn + dir_len + 1;
    352 	return (ss_pn);
    353 }
    354 
    355 
    356 static void
    357 rfs4_ss_fini(nfs_server_instance_t *instp)
    358 {
    359 	rfs4_clean_reclaim_list(instp);
    360 }
    361 
    362 void
    363 rfs4_ss_build_reclaim_list(nfs_server_instance_t *instp, char *resbuf)
    364 {
    365 	rfs4_reclaim_t *oldp;
    366 	struct ss_res *resp = (struct ss_res *)resbuf;
    367 	struct ss_rd_state *clp;
    368 	int c, len;
    369 
    370 	clp = resp->rec;
    371 	for (c = resp->nsize; c > 0; c--) {
    372 		oldp = kmem_alloc(sizeof (rfs4_reclaim_t), KM_SLEEP);
    373 		oldp->ss_pn = NULL;
    374 		len = (int)clp->ssr_len;
    375 		oldp->cl_id4.id_val = kmem_alloc(len, KM_SLEEP);
    376 		oldp->cl_id4.verifier = clp->ssr_veri;
    377 		oldp->cl_id4.id_len = len;
    378 		bcopy(clp->ssr_val, oldp->cl_id4.id_val, len);
    379 		list_insert_head(&instp->reclaim_head, oldp);
    380 		len += (sizeof (uint64_t) + sizeof (uint64_t));
    381 		len = P2ROUNDUP(len, 8);
    382 		clp = (struct ss_rd_state *)((char *)clp + len);
    383 	}
    384 	instp->reclaim_cnt = resp->nsize;
    385 }
    386 
    387 int
    388 rfs4_ss_read_state(nfs_server_instance_t *instp, char **buf, int *sz)
    389 {
    390 	struct ss_arg ss_data;
    391 	struct ss_res *ss_res;
    392 	door_arg_t dargs;
    393 	int err;
    394 
    395 	ss_data.cmd = NFS4_SS_READ;
    396 	ss_data.rsz = *sz;	/* size of return buffer */
    397 	(void) snprintf(ss_data.path, MAXPATHLEN, "%s", instp->inst_name);
    398 
    399 	dargs.data_ptr = (char *)&ss_data;
    400 	dargs.data_size = sizeof (struct ss_arg);
    401 	dargs.desc_ptr = NULL;
    402 	dargs.desc_num = 0;
    403 	dargs.rbuf = *buf;
    404 	dargs.rsize = *sz;
    405 
    406 	err = door_ki_upcall(instp->dh, &dargs);
    407 	if (err) {
    408 /*
    409  * XXX - When this happens, we are screwed.  nfsd has gone away and there
    410  * is nothing we can do about it here.  We probably need to just shutdown
    411  * the NFS server until nfsd is fixed.
    412  */
    413 		printf("CRAP!  The door upcall failed\n");
    414 		return (err);
    415 	}
    416 
    417 	ss_res = (struct ss_res *)dargs.rbuf;
    418 
    419 	if (ss_res->status != NFS_DR_SUCCESS) {
    420 		/* special handling for buffer too small */
    421 		if (ss_res->status == NFS_DR_OVERFLOW) {
    422 			*sz = ss_res->nsize;
    423 			return (-1);
    424 		}
    425 		return (ss_res->status);
    426 	}
    427 
    428 	/* if buf too small, but door provided buf */
    429 	if (dargs.rbuf != *buf) {
    430 		kmem_free(*buf, *sz);
    431 		*sz = dargs.rsize;
    432 		*buf = dargs.rbuf;
    433 	}
    434 	return (0);
    435 }
    436 
    437 /*
    438  * retrieve the oldstate from stable storage.
    439  */
    440 void
    441 rfs4_ss_retrieve_state(nfs_server_instance_t *instp)
    442 {
    443 	int ret, notdone;
    444 	int sz, osz;
    445 	char *resbuf;
    446 
    447 	osz = sz = 512 * 1024;
    448 	do {
    449 		notdone = 0;
    450 		resbuf = kmem_alloc(sz, KM_SLEEP);
    451 
    452 		ret = rfs4_ss_read_state(instp, &resbuf, &sz);
    453 		if (ret == -1) {
    454 			kmem_free(resbuf, osz);
    455 			osz = sz;
    456 			notdone = 1;
    457 		}
    458 	} while (notdone);
    459 
    460 	if (ret == 0)
    461 		rfs4_ss_build_reclaim_list(instp, resbuf);
    462 
    463 	kmem_free(resbuf, sz);
    464 
    465 	/* for now assume it's all good!  */
    466 	instp->inst_flags |= NFS_INST_SS_ENABLED;
    467 }
    468 
    469 /*
    470  * Check if we are still in grace and if the client can be
    471  * granted permission to perform reclaims.
    472  *
    473  * XXX Only called from  setclientid_confirm, if MDS need
    474  * XXX this then we need alterations!
    475  */
    476 void
    477 rfs4_ss_chkclid(struct compound_state *cs, rfs4_client_t *cp)
    478 {
    479 	/*
    480 	 * It should be sufficient to check the oldstate data for just
    481 	 * this client's instance. However, since our per-instance
    482 	 * client grouping is solely temporal, HA-NFSv4 RG failover
    483 	 * might result in clients of the same RG being partitioned into
    484 	 * separate instances.
    485 	 *
    486 	 * Until the client grouping is improved, we must check the
    487 	 * oldstate data for all instances with an active grace period.
    488 	 *
    489 	 * This also serves as the mechanism to remove stale oldstate data.
    490 	 * The first time we check an instance after its grace period has
    491 	 * expired, the oldstate data should be cleared.
    492 	 *
    493 	 * Start at the current instance, and walk the list backwards
    494 	 * to the first.
    495 	 */
    496 	rfs4_ss_chkclid_sip(cp, cs->instp);
    497 }
    498 
    499 static void
    500 rfs4_ss_chkclid_sip(rfs4_client_t *cp, nfs_server_instance_t *sip)
    501 {
    502 	rfs4_reclaim_t *osp, *os_head;
    503 
    504 	/* short circuit everything if this server instance has no oldstate */
    505 	rw_enter(&sip->reclaimlst_lock, RW_READER);
    506 	os_head = list_head(&sip->reclaim_head);
    507 	rw_exit(&sip->reclaimlst_lock);
    508 	if (os_head == NULL)
    509 		return;
    510 
    511 	/*
    512 	 * If this server instance is no longer in a grace period then
    513 	 * the client won't be able to reclaim. No further need for this
    514 	 * instance's oldstate data, so it can be cleared.
    515 	 */
    516 	if (!rfs4_in_grace(sip)) {
    517 		rfs4_ss_delete_oldstate(sip);
    518 		return;
    519 	}
    520 
    521 	/* this instance is still in grace; search for the clientid */
    522 
    523 	rw_enter(&sip->reclaimlst_lock, RW_READER);
    524 
    525 	osp = list_head(&sip->reclaim_head);
    526 	while (osp) {
    527 		if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
    528 			if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
    529 			    osp->cl_id4.id_len) == 0) {
    530 				cp->rc_can_reclaim = 1;
    531 				break;
    532 			}
    533 		}
    534 		osp = list_next(&sip->reclaim_head, osp);
    535 	}
    536 
    537 	rw_exit(&sip->reclaimlst_lock);
    538 }
    539 
    540 static void
    541 rfs4_ss_write(nfs_server_instance_t *instp, rfs4_client_t *cp, char *leaf)
    542 {
    543 	struct ss_arg *ss_datap;
    544 	struct ss_res res_buf;
    545 	struct ss_res *resp;
    546 	nfs_client_id4 *clp = &(cp->rc_nfs_client);
    547 	door_arg_t dargs;
    548 	rfs4_ss_pn_t *ss_pn;
    549 	int size, error;
    550 
    551 	size = sizeof (struct ss_arg) + clp->id_len;
    552 	ss_datap = kmem_alloc(size, KM_SLEEP);
    553 
    554 	ss_pn = rfs4_ss_pnalloc(instp->inst_name, leaf);
    555 	if (ss_pn == NULL) {
    556 		kmem_free(ss_datap, size);
    557 		return;
    558 	}
    559 	(void) snprintf(ss_datap->path, MAXPATHLEN, "%s/%s",
    560 	    instp->inst_name, leaf);
    561 
    562 	ss_datap->cmd = NFS4_SS_WRITE;
    563 	ss_datap->rec.ss_fvers = NFS4_SS_VERSION;
    564 	ss_datap->rec.ss_veri = clp->verifier;
    565 	ss_datap->rec.ss_len = clp->id_len;
    566 	bcopy(clp->id_val, ss_datap->rec.ss_val, clp->id_len);
    567 
    568 	dargs.data_ptr = (char *)ss_datap;
    569 	dargs.data_size = size;
    570 	dargs.desc_ptr = NULL;
    571 	dargs.desc_num = 0;
    572 	dargs.rbuf = (char *)&res_buf;
    573 	dargs.rsize = sizeof (struct ss_res);
    574 
    575 	error = door_ki_upcall(instp->dh, &dargs);
    576 
    577 	kmem_free(ss_datap, size);
    578 
    579 	if (error) {
    580 		rfs4_ss_pnfree(ss_pn);
    581 		return;
    582 	}
    583 	resp = (struct ss_res *)dargs.rbuf;
    584 	if (resp->status != 0) {
    585 		rfs4_ss_pnfree(ss_pn);
    586 		goto out;
    587 	}
    588 
    589 	if (cp->rc_ss_pn == NULL) {
    590 		cp->rc_ss_pn = ss_pn;
    591 	} else {
    592 		if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
    593 			/* we've already recorded *this* leaf */
    594 			rfs4_ss_pnfree(ss_pn);
    595 		} else {
    596 			/* replace with this leaf */
    597 			rfs4_ss_pnfree(cp->rc_ss_pn);
    598 			cp->rc_ss_pn = ss_pn;
    599 		}
    600 	}
    601 
    602 out:
    603 	/* this should never happen */
    604 	if (resp != &res_buf) {
    605 		kmem_free(resp, dargs.rsize);
    606 	}
    607 }
    608 
    609 /*
    610  * Place client information into stable storage.
    611  * First, generate the leaf filename, from the client's IP address and
    612  * the server-generated short-hand clientid.
    613  */
    614 void
    615 rfs4_ss_clid(struct compound_state *cs, rfs4_client_t *cp, struct svc_req *req)
    616 {
    617 	const char *kinet_ntop6(uchar_t *, char *, size_t);
    618 	char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
    619 	struct sockaddr *ca;
    620 	uchar_t *b;
    621 
    622 	if (!(cs->instp->inst_flags & NFS_INST_SS_ENABLED)) {
    623 		return;
    624 	}
    625 
    626 	buf[0] = 0;
    627 
    628 
    629 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
    630 	if (ca == NULL) {
    631 		return;
    632 	}
    633 
    634 	/*
    635 	 * Convert the caller's IP address to a dotted string
    636 	 */
    637 	if (ca->sa_family == AF_INET) {
    638 
    639 		bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->rc_addr,
    640 		    sizeof (struct sockaddr_in));
    641 		b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
    642 		(void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
    643 		    b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
    644 	} else if (ca->sa_family == AF_INET6) {
    645 		struct sockaddr_in6 *sin6;
    646 
    647 		sin6 = (struct sockaddr_in6 *)ca;
    648 		bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->rc_addr,
    649 		    sizeof (struct sockaddr_in6));
    650 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
    651 		    buf, INET6_ADDRSTRLEN);
    652 	}
    653 
    654 	(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
    655 	    (longlong_t)cp->rc_clientid);
    656 
    657 	rfs4_ss_write(cs->instp, cp, leaf);
    658 }
    659 
    660 
    661 
    662 /*
    663  * DSS: distributed stable storage.
    664  * Unpack the list of paths passed by nfsd.
    665  * Use nvlist_alloc(9F) to manage the data.
    666  * The caller is responsible for allocating and freeing the buffer.
    667  */
    668 int
    669 rfs4_dss_setpaths(char *buf, size_t buflen)
    670 {
    671 	int error;
    672 
    673 	/*
    674 	 * If this is a "warm start", i.e. we previously had DSS paths,
    675 	 * preserve the old paths.
    676 	 */
    677 	if (rfs4_dss_paths != NULL) {
    678 		/*
    679 		 * Before we lose the ptr, destroy the nvlist and pathnames
    680 		 * array from the warm start before this one.
    681 		 */
    682 		if (rfs4_dss_oldpaths)
    683 			nvlist_free(rfs4_dss_oldpaths);
    684 		rfs4_dss_oldpaths = rfs4_dss_paths;
    685 	}
    686 
    687 	/* unpack the buffer into a searchable nvlist */
    688 	error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
    689 	if (error)
    690 		return (error);
    691 
    692 	/*
    693 	 * Search the nvlist for the pathnames nvpair (which is the only nvpair
    694 	 * in the list, and record its location.
    695 	 */
    696 	error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
    697 	    &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
    698 	return (error);
    699 }
    700 
    701 /*
    702  * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
    703  * to find and call the protocol specific clean_up/expire
    704  * function;
    705  */
    706 static void
    707 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
    708 {
    709 	rfs4_client_t *cp = (rfs4_client_t *)ent;
    710 	struct nfs4clrst_args *clr = arg;
    711 	struct sockaddr_in6 *ent_sin6;
    712 	struct in6_addr  clr_in6;
    713 	struct sockaddr_in  *ent_sin;
    714 	struct in_addr   clr_in;
    715 	nfs_server_instance_t *instp;
    716 
    717 	if (clr->addr_type != cp->rc_addr.ss_family) {
    718 		return;
    719 	}
    720 
    721 	instp = dbe_to_instp(cp->rc_dbe);
    722 
    723 	switch (clr->addr_type) {
    724 
    725 	case AF_INET6:
    726 		/* copyin the address from user space */
    727 		if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
    728 			break;
    729 		}
    730 
    731 		ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
    732 
    733 		/*
    734 		 * now compare, and if equivalent mark entry
    735 		 * for forced expiration
    736 		 */
    737 		if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
    738 			(*instp->clnt_clear)(cp);
    739 		}
    740 		break;
    741 
    742 	case AF_INET:
    743 		/* copyin the address from user space */
    744 		if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
    745 			break;
    746 		}
    747 
    748 		ent_sin = (struct sockaddr_in *)&cp->rc_addr;
    749 
    750 		/*
    751 		 * now compare, and if equivalent mark entry
    752 		 * for forced expiration
    753 		 */
    754 		if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
    755 			(*instp->clnt_clear)(cp);
    756 		}
    757 		break;
    758 
    759 	default:
    760 		/* force this assert to fail */
    761 		ASSERT(clr->addr_type != clr->addr_type);
    762 	}
    763 }
    764 
    765 static void
    766 sstor_client_scrub(nfs_server_instance_t *instp, void *data)
    767 {
    768 	struct nfs4clrst_args *arg = (struct nfs4clrst_args *)data;
    769 
    770 	if (instp->client_tab != NULL)
    771 		rfs4_dbe_walk(instp->client_tab, rfs4_client_scrub, arg);
    772 }
    773 
    774 /*
    775  * This is called from nfssys() in order to clear server state
    776  * for the specified client IP Address.
    777  */
    778 void
    779 rfs4_clear_client_state(struct nfs4clrst_args *clr)
    780 {
    781 	nsi_walk(sstor_client_scrub, clr);
    782 }
    783 
    784 /* this need to be cleaned up robert.. hello.. */
    785 typedef union {
    786 	struct {
    787 		uint32_t start_time;
    788 		uint32_t c_id;
    789 	} impl_id;
    790 	clientid4 id4;
    791 } cid;
    792 
    793 static int foreign_stateid(stateid_t *id);
    794 static int foreign_clientid(cid *cidp);
    795 static void embed_nodeid(cid *cidp);
    796 
    797 typedef union {
    798 	struct {
    799 		uint32_t c_id;
    800 		uint32_t gen_num;
    801 	} cv_impl;
    802 	verifier4	confirm_verf;
    803 } scid_confirm_verf;
    804 
    805 uint32_t
    806 clientid_hash(void *key)
    807 {
    808 	cid *idp = key;
    809 
    810 	return (idp->impl_id.c_id);
    811 }
    812 
    813 bool_t
    814 clientid_compare(rfs4_entry_t entry, void *key)
    815 {
    816 	rfs4_client_t *cp = (rfs4_client_t *)entry;
    817 	clientid4 *idp = key;
    818 
    819 	return (*idp == cp->rc_clientid);
    820 }
    821 
    822 void *
    823 clientid_mkkey(rfs4_entry_t entry)
    824 {
    825 	rfs4_client_t *cp = (rfs4_client_t *)entry;
    826 
    827 	return (&cp->rc_clientid);
    828 }
    829 
    830 uint32_t
    831 nfsclnt_hash(void *key)
    832 {
    833 	nfs_client_id4 *client = key;
    834 	int i;
    835 	uint32_t hash = 0;
    836 
    837 	for (i = 0; i < client->id_len; i++) {
    838 		hash <<= 1;
    839 		hash += (uint_t)client->id_val[i];
    840 	}
    841 	return (hash);
    842 }
    843 
    844 
    845 bool_t
    846 nfsclnt_compare(rfs4_entry_t entry, void *key)
    847 {
    848 	rfs4_client_t *cp = (rfs4_client_t *)entry;
    849 	nfs_client_id4 *nfs_client = key;
    850 
    851 	if (cp->rc_nfs_client.id_len != nfs_client->id_len)
    852 		return (FALSE);
    853 
    854 	return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
    855 	    nfs_client->id_len) == 0);
    856 }
    857 
    858 void *
    859 nfsclnt_mkkey(rfs4_entry_t entry)
    860 {
    861 	rfs4_client_t *cp = (rfs4_client_t *)entry;
    862 
    863 	return (&cp->rc_nfs_client);
    864 }
    865 
    866 bool_t
    867 rfs4_client_expiry(rfs4_entry_t u_entry)
    868 {
    869 	nfs_server_instance_t *instp;
    870 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
    871 	bool_t cp_expired;
    872 
    873 	if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
    874 		cp->rc_ss_remove = 1;
    875 		return (TRUE);
    876 	}
    877 
    878 	if (cp->rc_clid_scope)
    879 		return (FALSE);
    880 
    881 	instp = dbe_to_instp(cp->rc_dbe);
    882 	/*
    883 	 * If the sysadmin has used clear_locks for this
    884 	 * entry then forced_expire will be set and we
    885 	 * want this entry to be reaped. Or the entry
    886 	 * has exceeded its lease period.
    887 	 */
    888 	cp_expired = (cp->rc_forced_expire ||
    889 	    (gethrestime_sec() - cp->rc_last_access
    890 	    > instp->lease_period));
    891 
    892 	if (!cp->rc_ss_remove && cp_expired)
    893 		cp->rc_ss_remove = 1;
    894 	return (cp_expired);
    895 }
    896 
    897 static void
    898 rfs4_ss_delete_client(nfs_server_instance_t *instp, char *leaf)
    899 {
    900 	struct ss_arg ss_data;
    901 	struct ss_res res_buf;
    902 	door_arg_t dargs;
    903 	int error;
    904 
    905 	ss_data.cmd = NFS4_SS_DELETE_CLNT;
    906 	(void) snprintf(ss_data.path, MAXPATHLEN, "%s/%s",
    907 	    instp->inst_name, leaf);
    908 
    909 	dargs.data_ptr = (char *)&ss_data;
    910 	dargs.data_size = sizeof (struct ss_arg);
    911 	dargs.desc_ptr = NULL;
    912 	dargs.desc_num = 0;
    913 	dargs.rbuf = (char *)&res_buf;
    914 	dargs.rsize = sizeof (struct ss_res);
    915 
    916 	error = door_ki_upcall(instp->dh, &dargs);
    917 
    918 #ifdef DEBUG
    919 	/* XXX - jw - what do we do here? */
    920 	if (error)
    921 		printf("ss_delete_client: door upcall failed! (%d)\n", error);
    922 #endif
    923 }
    924 
    925 static void
    926 rfs4_ss_delete_oldstate(nfs_server_instance_t *instp)
    927 {
    928 	struct ss_arg ss_data;
    929 	struct ss_res res_buf;
    930 	door_arg_t dargs;
    931 	int error;
    932 
    933 	ss_data.cmd = NFS4_SS_DELETE_OLD;
    934 	(void) snprintf(ss_data.path, MAXPATHLEN, "%s", instp->inst_name);
    935 
    936 	dargs.data_ptr = (char *)&ss_data;
    937 	dargs.data_size = sizeof (struct ss_arg);
    938 	dargs.desc_ptr = NULL;
    939 	dargs.desc_num = 0;
    940 	dargs.rbuf = (char *)&res_buf;
    941 	dargs.rsize = sizeof (struct ss_res);
    942 
    943 	error = door_ki_upcall(instp->dh, &dargs);
    944 
    945 #ifdef DEBUG
    946 	/* XXX - jw - what do we do here? */
    947 	if (error)
    948 		printf("delete_oldstate: door upcall failed! (%d)\n", error);
    949 #endif
    950 
    951 	rfs4_clean_reclaim_list(instp);
    952 }
    953 
    954 static void
    955 rfs4_clean_reclaim_list(nfs_server_instance_t *instp)
    956 {
    957 	rfs4_reclaim_t *op;
    958 
    959 	rw_enter(&instp->reclaimlst_lock, RW_WRITER);
    960 
    961 	while (op = list_head(&instp->reclaim_head)) {
    962 		list_remove(&instp->reclaim_head, op);
    963 		if (op->cl_id4.id_val)
    964 			kmem_free(op->cl_id4.id_val, op->cl_id4.id_len);
    965 		if (op->ss_pn)
    966 			kmem_free(op->ss_pn, sizeof (rfs4_ss_pn_t));
    967 		kmem_free(op, sizeof (rfs4_reclaim_t));
    968 	}
    969 
    970 	rw_exit(&instp->reclaimlst_lock);
    971 }
    972 
    973 void
    974 rfs4_client_destroy(rfs4_entry_t u_entry)
    975 {
    976 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
    977 	nfs_server_instance_t *instp;
    978 
    979 	instp = dbe_to_instp(cp->rc_dbe);
    980 
    981 	mutex_destroy(cp->rc_cbinfo.cb_lock);
    982 	cv_destroy(cp->rc_cbinfo.cb_cv);
    983 	cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
    984 
    985 	/* free callback info */
    986 	rfs4_cbinfo_free(&cp->rc_cbinfo);
    987 
    988 	if (cp->rc_cp_confirmed)
    989 		rfs4_client_rele(cp->rc_cp_confirmed);
    990 
    991 	if (cp->rc_ss_pn) {
    992 		/* check if the stable storage files need to be removed */
    993 		if (cp->rc_ss_remove) {
    994 			rfs4_ss_delete_client(instp, cp->rc_ss_pn->leaf);
    995 		}
    996 		rfs4_ss_pnfree(cp->rc_ss_pn);
    997 	}
    998 
    999 	/* if this is a 4.1 client, clean up it's sessions */
   1000 	if (instp->inst_flags & NFS_INST_v41) {
   1001 		mds_clean_up_sessions(cp);
   1002 		mds_clean_up_grants(cp);
   1003 		mds_clean_up_trunkinfo(cp);
   1004 	}
   1005 
   1006 	/* Free the client supplied client id */
   1007 	kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
   1008 
   1009 	if (cp->rc_sysidt != LM_NOSYSID)
   1010 		lm_free_sysidt(cp->rc_sysidt);
   1011 }
   1012 
   1013 bool_t
   1014 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
   1015 {
   1016 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
   1017 	nfs_client_id4 *client = (nfs_client_id4 *)arg;
   1018 	cid *cidp;
   1019 	scid_confirm_verf *scvp;
   1020 	int	i;
   1021 
   1022 	/* Get a clientid to give to the client */
   1023 	cidp = (cid *)&cp->rc_clientid;
   1024 	cidp->impl_id.start_time = cp->rc_dbe->dbe_table->dbt_instp->start_time;
   1025 	cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
   1026 
   1027 	/* If we are booted as a cluster node, embed our nodeid */
   1028 	if (cluster_bootflags & CLUSTER_BOOTED)
   1029 		embed_nodeid(cidp);
   1030 
   1031 	/* Allocate and copy client's client id value */
   1032 	cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
   1033 	cp->rc_nfs_client.id_len = client->id_len;
   1034 	bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
   1035 	cp->rc_nfs_client.verifier = client->verifier;
   1036 
   1037 	/* Init the value for the verifier */
   1038 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
   1039 	scvp->cv_impl.c_id = cidp->impl_id.c_id;
   1040 	scvp->cv_impl.gen_num = 0;
   1041 
   1042 	/* An F_UNLKSYS has been done for this client */
   1043 	cp->rc_unlksys_completed = FALSE;
   1044 
   1045 	/* We need the client to ack us */
   1046 	cp->rc_need_confirm = TRUE;
   1047 	cp->rc_cp_confirmed = NULL;
   1048 
   1049 	/* TRUE all the time until the callback path actually fails */
   1050 	cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
   1051 
   1052 	/* Initialize the access time to now */
   1053 	cp->rc_last_access = gethrestime_sec();
   1054 
   1055 	cp->rc_cr_set = NULL;
   1056 
   1057 	cp->rc_sysidt = LM_NOSYSID;
   1058 
   1059 	list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
   1060 	    offsetof(rfs4_openowner_t, ro_node));
   1061 
   1062 	/* Init client grant list for remque/insque */
   1063 	cp->rc_clientgrantlist.next = cp->rc_clientgrantlist.prev =
   1064 	    &cp->rc_clientgrantlist;
   1065 	cp->rc_clientgrantlist.lg = NULL;
   1066 
   1067 	cp->rc_bulk_recall = 0;
   1068 
   1069 	/* set up the callback control structure */
   1070 	cp->rc_cbinfo.cb_state = CB_UNINIT;
   1071 	mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
   1072 	cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
   1073 	cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
   1074 
   1075 	/*
   1076 	 * NFSv4.1: See draft-07, Section 16.36.5
   1077 	 */
   1078 	cp->rc_contrived.xi_sid = 1;
   1079 	cp->rc_contrived.cs_slot.seqid = 0;
   1080 	cp->rc_contrived.cs_slot.status = NFS4ERR_SEQ_MISORDERED;
   1081 
   1082 	/* only initialize bits relevant to client scope */
   1083 	bzero(&cp->rc_seq4, sizeof (bit_attr_t) * BITS_PER_WORD);
   1084 	for (i = 1; i <= SEQ4_HIGH_BIT && i != 0; i <<= 1) {
   1085 		uint32_t idx = log2(i);
   1086 
   1087 		switch (i) {
   1088 		case SEQ4_STATUS_CB_PATH_DOWN:
   1089 		case SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED:
   1090 		case SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED:
   1091 		case SEQ4_STATUS_ADMIN_STATE_REVOKED:
   1092 		case SEQ4_STATUS_RECALLABLE_STATE_REVOKED:
   1093 		case SEQ4_STATUS_LEASE_MOVED:
   1094 		case SEQ4_STATUS_RESTART_RECLAIM_NEEDED:
   1095 		case SEQ4_STATUS_DEVID_CHANGED:
   1096 		case SEQ4_STATUS_DEVID_DELETED:
   1097 			cp->rc_seq4[idx].ba_bit = i;
   1098 			break;
   1099 		default:
   1100 			/* already bzero'ed */
   1101 			break;
   1102 		}
   1103 	}
   1104 
   1105 	list_create(&cp->rc_trunkinfo, sizeof (rfs41_tie_t),
   1106 	    offsetof(rfs41_tie_t, t_link));
   1107 	return (TRUE);
   1108 }
   1109 
   1110 /*
   1111  * Caller wants to generate/update the setclientid_confirm verifier
   1112  * associated with a client.  This is done during the SETCLIENTID
   1113  * processing.
   1114  */
   1115 void
   1116 rfs4_client_scv_next(rfs4_client_t *cp)
   1117 {
   1118 	scid_confirm_verf *scvp;
   1119 
   1120 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
   1121 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
   1122 	scvp->cv_impl.gen_num++;
   1123 }
   1124 
   1125 void
   1126 rfs4_client_rele(rfs4_client_t *cp)
   1127 {
   1128 	rfs4_dbe_rele(cp->rc_dbe);
   1129 }
   1130 
   1131 /*
   1132  *  Find an rfs4_client
   1133  */
   1134 rfs4_client_t *
   1135 findclient(nfs_server_instance_t *instp,
   1136 	nfs_client_id4 *client,
   1137 	bool_t *create,
   1138 	rfs4_client_t *oldcp)
   1139 {
   1140 	rfs4_client_t *cp;
   1141 
   1142 	if (oldcp) {
   1143 		rw_enter(&instp->findclient_lock, RW_WRITER);
   1144 		rfs4_dbe_hide(oldcp->rc_dbe);
   1145 	} else {
   1146 		rw_enter(&instp->findclient_lock, RW_READER);
   1147 	}
   1148 
   1149 	cp = (rfs4_client_t *)rfs4_dbsearch(instp->nfsclnt_idx, client,
   1150 	    create, (void *)client, RFS4_DBS_VALID);
   1151 
   1152 	if (oldcp)
   1153 		rfs4_dbe_unhide(oldcp->rc_dbe);
   1154 
   1155 	rw_exit(&instp->findclient_lock);
   1156 
   1157 	return (cp);
   1158 }
   1159 
   1160 /*
   1161  * Find an rfs4_client via the ID.
   1162  */
   1163 rfs4_client_t *
   1164 findclient_by_id(nfs_server_instance_t *instp, clientid4 clientid)
   1165 {
   1166 	rfs4_client_t *cp;
   1167 	bool_t create = FALSE;
   1168 
   1169 	rw_enter(&instp->findclient_lock, RW_READER);
   1170 
   1171 	cp = (rfs4_client_t *)rfs4_dbsearch(instp->clientid_idx, &clientid,
   1172 	    &create, NULL, RFS4_DBS_VALID);
   1173 
   1174 	rw_exit(&instp->findclient_lock);
   1175 
   1176 	return (cp);
   1177 }
   1178 
   1179 rfs4_client_t *
   1180 rfs4_findclient_by_id(nfs_server_instance_t *instp, clientid4 clientid,
   1181     bool_t find_unconfirmed)
   1182 {
   1183 	rfs4_client_t *cp;
   1184 	cid *cidp = (cid *)&clientid;
   1185 
   1186 	/* If we're a cluster and the nodeid isn't right, short-circuit */
   1187 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
   1188 		return (NULL);
   1189 
   1190 	cp = findclient_by_id(instp, clientid);
   1191 
   1192 	if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
   1193 		rfs4_client_rele(cp);
   1194 		return (NULL);
   1195 	}
   1196 	return (cp);
   1197 }
   1198 
   1199 /*
   1200  * Evaluate if the lease for this client has expired.
   1201  */
   1202 bool_t
   1203 rfs4_lease_expired(rfs4_client_t *cp)
   1204 {
   1205 	bool_t rc;
   1206 
   1207 	rfs4_dbe_lock(cp->rc_dbe);
   1208 
   1209 	/*
   1210 	 * If the admin has executed clear_locks for this
   1211 	 * client id, force expire will be set, so no need
   1212 	 * to calculate anything because it's "outa here".
   1213 	 */
   1214 	if (cp->rc_forced_expire) {
   1215 		rc = TRUE;
   1216 	} else {
   1217 		if (cp->rc_clid_scope) {
   1218 			rc = FALSE;
   1219 		} else {
   1220 			rc = (gethrestime_sec() - cp->rc_last_access >
   1221 			    dbe_to_instp(cp->rc_dbe)->lease_period);
   1222 		}
   1223 	}
   1224 
   1225 	/*
   1226 	 * If the lease has expired we will also want
   1227 	 * to remove any stable storage state data. So
   1228 	 * mark the client id accordingly.
   1229 	 */
   1230 	if (!cp->rc_ss_remove)
   1231 		cp->rc_ss_remove = (rc == TRUE);
   1232 
   1233 	rfs4_dbe_unlock(cp->rc_dbe);
   1234 
   1235 	return (rc);
   1236 }
   1237 
   1238 void
   1239 rfs4_update_lease(rfs4_client_t *cp)
   1240 {
   1241 	rfs4_dbe_lock(cp->rc_dbe);
   1242 	if (!cp->rc_forced_expire)
   1243 		cp->rc_last_access = gethrestime_sec();
   1244 	rfs4_dbe_unlock(cp->rc_dbe);
   1245 }
   1246 
   1247 void
   1248 rfs4_state_rele_nounlock(rfs4_state_t *sp)
   1249 {
   1250 	rfs4_dbe_rele(sp->rs_dbe);
   1251 }
   1252 
   1253 void
   1254 rfs4_state_rele(rfs4_state_t *sp)
   1255 {
   1256 	rw_exit(&sp->rs_finfo->rf_file_rwlock);
   1257 	rfs4_dbe_rele(sp->rs_dbe);
   1258 }
   1259 
   1260 /*
   1261  * Open Owners:
   1262  */
   1263 uint_t
   1264 openowner_hash(void *key)
   1265 {
   1266 	int i;
   1267 	open_owner4 *openowner = key;
   1268 	uint_t hash = 0;
   1269 
   1270 	for (i = 0; i < openowner->owner_len; i++) {
   1271 		hash <<= 4;
   1272 		hash += (uint_t)openowner->owner_val[i];
   1273 	}
   1274 	hash += (uint_t)openowner->clientid;
   1275 	hash |= (openowner->clientid >> 32);
   1276 
   1277 	return (hash);
   1278 }
   1279 
   1280 bool_t
   1281 openowner_compare(rfs4_entry_t u_entry, void *key)
   1282 {
   1283 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
   1284 	open_owner4 *arg = key;
   1285 	bool_t rc;
   1286 
   1287 	if (oo->ro_owner.clientid != arg->clientid)
   1288 		return (FALSE);
   1289 
   1290 	if (oo->ro_owner.owner_len != arg->owner_len)
   1291 		return (FALSE);
   1292 
   1293 	rc = (bcmp(oo->ro_owner.owner_val,
   1294 	    arg->owner_val, arg->owner_len) == 0);
   1295 
   1296 	return (rc);
   1297 }
   1298 
   1299 void *
   1300 openowner_mkkey(rfs4_entry_t u_entry)
   1301 {
   1302 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
   1303 
   1304 	return (&oo->ro_owner);
   1305 }
   1306 
   1307 bool_t
   1308 rfs4_openowner_expiry(rfs4_entry_t u_entry)
   1309 {
   1310 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
   1311 
   1312 	if (rfs4_dbe_is_invalid(oo->ro_dbe))
   1313 		return (TRUE);
   1314 	return ((gethrestime_sec() - oo->ro_client->rc_last_access
   1315 	    > dbe_to_instp(oo->ro_dbe)->lease_period));
   1316 }
   1317 
   1318 void
   1319 openowner_destroy(rfs4_entry_t u_entry)
   1320 {
   1321 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
   1322 
   1323 	/* Remove open owner from client's lists of open owners */
   1324 	rfs4_dbe_lock(oo->ro_client->rc_dbe);
   1325 	list_remove(&oo->ro_client->rc_openownerlist, oo);
   1326 	rfs4_dbe_unlock(oo->ro_client->rc_dbe);
   1327 
   1328 	/* One less reference to the client */
   1329 	rfs4_client_rele(oo->ro_client);
   1330 	oo->ro_client = NULL;
   1331 
   1332 	/* Free the last reply for this lock owner */
   1333 	rfs4_free_reply(oo->ro_reply);
   1334 
   1335 	if (oo->ro_reply_fh.nfs_fh4_val) {
   1336 		kmem_free(oo->ro_reply_fh.nfs_fh4_val,
   1337 		    oo->ro_reply_fh.nfs_fh4_len);
   1338 		oo->ro_reply_fh.nfs_fh4_val = NULL;
   1339 		oo->ro_reply_fh.nfs_fh4_len = 0;
   1340 	}
   1341 
   1342 	rfs4_sw_destroy(&oo->ro_sw);
   1343 	list_destroy(&oo->ro_statelist);
   1344 
   1345 	/* Free the lock owner id */
   1346 	kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
   1347 }
   1348 
   1349 void
   1350 rfs4_openowner_rele(rfs4_openowner_t *oo)
   1351 {
   1352 	rfs4_dbe_rele(oo->ro_dbe);
   1353 }
   1354 
   1355 bool_t
   1356 openowner_create(rfs4_entry_t u_entry, void *arg)
   1357 {
   1358 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
   1359 	rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
   1360 	open_owner4 *openowner = &argp->ro_owner;
   1361 	seqid4 seqid = argp->ro_open_seqid;
   1362 	rfs4_client_t *cp;
   1363 	bool_t create = FALSE;
   1364 	nfs_server_instance_t *instp;
   1365 
   1366 	instp = dbe_to_instp(oo->ro_dbe);
   1367 
   1368 	rw_enter(&instp->findclient_lock, RW_READER);
   1369 
   1370 	cp = (rfs4_client_t *)rfs4_dbsearch(instp->clientid_idx,
   1371 	    &openowner->clientid,
   1372 	    &create, NULL, RFS4_DBS_VALID);
   1373 
   1374 	rw_exit(&instp->findclient_lock);
   1375 
   1376 	if (cp == NULL)
   1377 		return (FALSE);
   1378 
   1379 	oo->ro_reply_fh.nfs_fh4_len = 0;
   1380 	oo->ro_reply_fh.nfs_fh4_val = NULL;
   1381 
   1382 	oo->ro_owner.clientid = openowner->clientid;
   1383 	oo->ro_owner.owner_val =
   1384 	    kmem_alloc(openowner->owner_len, KM_SLEEP);
   1385 
   1386 	bcopy(openowner->owner_val,
   1387 	    oo->ro_owner.owner_val, openowner->owner_len);
   1388 
   1389 	oo->ro_owner.owner_len = openowner->owner_len;
   1390 
   1391 	oo->ro_need_confirm = TRUE;
   1392 
   1393 	rfs4_sw_init(&oo->ro_sw);
   1394 
   1395 	oo->ro_open_seqid = seqid;
   1396 	bzero(&oo->ro_reply, sizeof (nfs_resop4));
   1397 	oo->ro_client = cp;
   1398 	oo->ro_cr_set = NULL;
   1399 
   1400 	list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
   1401 	    offsetof(rfs4_state_t, rs_node));
   1402 
   1403 	/* Insert openowner into client's open owner list */
   1404 	rfs4_dbe_lock(cp->rc_dbe);
   1405 	list_insert_tail(&cp->rc_openownerlist, oo);
   1406 	rfs4_dbe_unlock(cp->rc_dbe);
   1407 
   1408 	return (TRUE);
   1409 }
   1410 
   1411 rfs4_openowner_t *
   1412 rfs4_findopenowner(nfs_server_instance_t *instp,
   1413     open_owner4 *openowner, bool_t *create, seqid4 seqid)
   1414 {
   1415 	rfs4_openowner_t *oo;
   1416 	rfs4_openowner_t arg;
   1417 
   1418 	arg.ro_owner = *openowner;
   1419 	arg.ro_open_seqid = seqid;
   1420 	oo = (rfs4_openowner_t *)rfs4_dbsearch(instp->openowner_idx,
   1421 	    openowner, create, &arg, RFS4_DBS_VALID);
   1422 
   1423 	return (oo);
   1424 }
   1425 
   1426 /* !!! NFSv4.0 ONLY !!! */
   1427 void
   1428 rfs4_update_open_sequence(rfs4_openowner_t *oo)
   1429 {
   1430 
   1431 	ASSERT(!(dbe_to_instp(oo->ro_dbe)->inst_flags & NFS_INST_v41));
   1432 
   1433 	rfs4_dbe_lock(oo->ro_dbe);
   1434 
   1435 	oo->ro_open_seqid++;
   1436 
   1437 	rfs4_dbe_unlock(oo->ro_dbe);
   1438 }
   1439 
   1440 void
   1441 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
   1442 {
   1443 	ASSERT(!(dbe_to_instp(oo->ro_dbe)->inst_flags & NFS_INST_v41));
   1444 
   1445 	rfs4_dbe_lock(oo->ro_dbe);
   1446 
   1447 	rfs4_free_reply(oo->ro_reply);
   1448 
   1449 	rfs4_copy_reply(oo->ro_reply, resp);
   1450 
   1451 	/* Save the filehandle if provided and free if not used */
   1452 	if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
   1453 	    fh && fh->nfs_fh4_len) {
   1454 		if (oo->ro_reply_fh.nfs_fh4_val == NULL)
   1455 			oo->ro_reply_fh.nfs_fh4_val =
   1456 			    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
   1457 		nfs_fh4_copy(fh, &oo->ro_reply_fh);
   1458 	} else {
   1459 		if (oo->ro_reply_fh.nfs_fh4_val) {
   1460 			kmem_free(oo->ro_reply_fh.nfs_fh4_val,
   1461 			    oo->ro_reply_fh.nfs_fh4_len);
   1462 			oo->ro_reply_fh.nfs_fh4_val = NULL;
   1463 			oo->ro_reply_fh.nfs_fh4_len = 0;
   1464 		}
   1465 	}
   1466 
   1467 	rfs4_dbe_unlock(oo->ro_dbe);
   1468 }
   1469 
   1470 /*
   1471  * Lock Owner:
   1472  */
   1473 bool_t
   1474 lockowner_compare(rfs4_entry_t u_entry, void *key)
   1475 {
   1476 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1477 	lock_owner4 *b = (lock_owner4 *)key;
   1478 
   1479 	if (lo->rl_owner.clientid != b->clientid)
   1480 		return (FALSE);
   1481 
   1482 	if (lo->rl_owner.owner_len != b->owner_len)
   1483 		return (FALSE);
   1484 
   1485 	return (bcmp(lo->rl_owner.owner_val, b->owner_val,
   1486 	    lo->rl_owner.owner_len) == 0);
   1487 }
   1488 
   1489 void *
   1490 lockowner_mkkey(rfs4_entry_t u_entry)
   1491 {
   1492 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1493 
   1494 	return (&lo->rl_owner);
   1495 }
   1496 
   1497 uint32_t
   1498 lockowner_hash(void *key)
   1499 {
   1500 	int i;
   1501 	lock_owner4 *lockowner = key;
   1502 	uint_t hash = 0;
   1503 
   1504 	for (i = 0; i < lockowner->owner_len; i++) {
   1505 		hash <<= 4;
   1506 		hash += (uint_t)lockowner->owner_val[i];
   1507 	}
   1508 	hash += (uint_t)lockowner->clientid;
   1509 	hash |= (lockowner->clientid >> 32);
   1510 
   1511 	return (hash);
   1512 }
   1513 
   1514 uint32_t
   1515 pid_hash(void *key)
   1516 {
   1517 	return ((uint32_t)(uintptr_t)key);
   1518 }
   1519 
   1520 void *
   1521 pid_mkkey(rfs4_entry_t u_entry)
   1522 {
   1523 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1524 
   1525 	return ((void *)(uintptr_t)lo->rl_pid);
   1526 }
   1527 
   1528 bool_t
   1529 pid_compare(rfs4_entry_t u_entry, void *key)
   1530 {
   1531 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1532 
   1533 	return (lo->rl_pid == (pid_t)(uintptr_t)key);
   1534 }
   1535 
   1536 void
   1537 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
   1538 {
   1539 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1540 
   1541 	/* Free the lock owner id */
   1542 	kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
   1543 	rfs4_client_rele(lo->rl_client);
   1544 }
   1545 
   1546 void
   1547 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
   1548 {
   1549 	rfs4_dbe_rele(lo->rl_dbe);
   1550 }
   1551 
   1552 /* ARGSUSED */
   1553 bool_t
   1554 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
   1555 {
   1556 	/*
   1557 	 * Since expiry is called with no other references on
   1558 	 * this struct, go ahead and have it removed.
   1559 	 */
   1560 	return (TRUE);
   1561 }
   1562 
   1563 bool_t
   1564 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
   1565 {
   1566 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
   1567 	lock_owner4 *lockowner = (lock_owner4 *)arg;
   1568 	rfs4_client_t *cp;
   1569 	bool_t create = FALSE;
   1570 	nfs_server_instance_t *instp;
   1571 
   1572 	instp = dbe_to_instp(lo->rl_dbe);
   1573 
   1574 	rw_enter(&instp->findclient_lock, RW_READER);
   1575 
   1576 	cp = (rfs4_client_t *)rfs4_dbsearch(instp->clientid_idx,
   1577 	    &lockowner->clientid,
   1578 	    &create, NULL, RFS4_DBS_VALID);
   1579 
   1580 	rw_exit(&instp->findclient_lock);
   1581 
   1582 	if (cp == NULL)
   1583 		return (FALSE);
   1584 
   1585 	/* Reference client */
   1586 	lo->rl_client = cp;
   1587 	lo->rl_owner.clientid = lockowner->clientid;
   1588 	lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
   1589 	bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
   1590 	    lockowner->owner_len);
   1591 	lo->rl_owner.owner_len = lockowner->owner_len;
   1592 	lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
   1593 
   1594 	return (TRUE);
   1595 }
   1596 
   1597 
   1598 rfs4_lockowner_t *
   1599 findlockowner(nfs_server_instance_t *instp, lock_owner4 *lockowner,
   1600 	    bool_t *create)
   1601 {
   1602 	rfs4_lockowner_t *lo;
   1603 
   1604 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(instp->lockowner_idx,
   1605 	    lockowner, create, lockowner,
   1606 	    RFS4_DBS_VALID);
   1607 
   1608 	return (lo);
   1609 }
   1610 
   1611 
   1612 rfs4_lockowner_t *
   1613 findlockowner_by_pid(nfs_server_instance_t *instp, pid_t pid)
   1614 {
   1615 	rfs4_lockowner_t *lo;
   1616 	bool_t create = FALSE;
   1617 
   1618 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(instp->lockowner_pid_idx,
   1619 	    (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
   1620 
   1621 	return (lo);
   1622 }
   1623 
   1624 /*
   1625  * rfs4_file:
   1626  */
   1627 uint32_t
   1628 file_hash(void *key)
   1629 {
   1630 	return (ADDRHASH(key));
   1631 }
   1632 
   1633 void *
   1634 file_mkkey(rfs4_entry_t u_entry)
   1635 {
   1636 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
   1637 
   1638 	return (fp->rf_vp);
   1639 }
   1640 
   1641 bool_t
   1642 file_compare(rfs4_entry_t u_entry, void *key)
   1643 {
   1644 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
   1645 
   1646 	return (fp->rf_vp == (vnode_t *)key);
   1647 }
   1648 
   1649 void
   1650 rfs4_file_destroy(rfs4_entry_t u_entry)
   1651 {
   1652 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
   1653 
   1654 	if (fp->rf_mlo) {
   1655 		rfs4_dbe_rele(fp->rf_mlo->mlo_dbe);
   1656 		fp->rf_mlo = NULL;
   1657 	}
   1658 
   1659 	list_destroy(&fp->rf_delegstatelist);
   1660 
   1661 	if (fp->rf_filehandle.nfs_fh4_val)
   1662 		kmem_free(fp->rf_filehandle.nfs_fh4_val,
   1663 		    fp->rf_filehandle.nfs_fh4_len);
   1664 	cv_destroy(fp->rf_dinfo->rd_recall_cv);
   1665 	if (fp->rf_vp) {
   1666 		vnode_t *vp = fp->rf_vp;
   1667 		nfs_server_instance_t *instp;
   1668 
   1669 		instp = dbe_to_instp(fp->rf_dbe);
   1670 		mutex_enter(&vp->v_vsd_lock);
   1671 		(void) vsd_set(vp, instp->vkey, NULL);
   1672 		mutex_exit(&vp->v_vsd_lock);
   1673 		VN_RELE(vp);
   1674 		fp->rf_vp = NULL;
   1675 	}
   1676 	rw_destroy(&fp->rf_file_rwlock);
   1677 }
   1678 
   1679 /*
   1680  * Used to unlock the underlying dbe struct only
   1681  */
   1682 void
   1683 rfs4_file_rele(rfs4_file_t *fp)
   1684 {
   1685 	rfs4_dbe_rele(fp->rf_dbe);
   1686 }
   1687 
   1688 /*
   1689  * Used to unlock the file rw lock and the file's dbe entry
   1690  * Only used to pair with rfs4_findfile_withlock()
   1691  */
   1692 void
   1693 rfs4_file_rele_withunlock(rfs4_file_t *fp)
   1694 {
   1695 	rw_exit(&fp->rf_file_rwlock);
   1696 	rfs4_dbe_rele(fp->rf_dbe);
   1697 }
   1698 
   1699 typedef struct {
   1700     vnode_t *vp;
   1701     nfs_fh4 *fh;
   1702 } rfs4_fcreate_arg;
   1703 
   1704 /* ARGSUSED */
   1705 bool_t
   1706 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
   1707 {
   1708 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
   1709 	rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
   1710 	vnode_t *vp = ap->vp;
   1711 	nfs_fh4 *fh = ap->fh;
   1712 	nfs_server_instance_t *instp;
   1713 
   1714 	instp = dbe_to_instp(fp->rf_dbe);
   1715 
   1716 	VN_HOLD(vp);
   1717 
   1718 	fp->rf_filehandle.nfs_fh4_len = 0;
   1719 	fp->rf_filehandle.nfs_fh4_val = NULL;
   1720 	ASSERT(fh && fh->nfs_fh4_len);
   1721 	if (fh && fh->nfs_fh4_len) {
   1722 		fp->rf_filehandle.nfs_fh4_val =
   1723 		    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
   1724 		nfs_fh4_copy(fh, &fp->rf_filehandle);
   1725 	}
   1726 	fp->rf_vp = vp;
   1727 
   1728 	list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
   1729 	    offsetof(rfs4_deleg_state_t, rds_node));
   1730 
   1731 	/* Init layout grant list for remque/insque */
   1732 	fp->rf_lo_grant_list.next = fp->rf_lo_grant_list.prev =
   1733 	    &fp->rf_lo_grant_list;
   1734 	fp->rf_lo_grant_list.lg = NULL;
   1735 
   1736 	fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
   1737 	fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
   1738 
   1739 	mutex_init(fp->rf_dinfo->rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
   1740 	cv_init(fp->rf_dinfo->rd_recall_cv, NULL, CV_DEFAULT, NULL);
   1741 
   1742 	fp->rf_dinfo->rd_dtype = OPEN_DELEGATE_NONE;
   1743 
   1744 	rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
   1745 
   1746 	mutex_enter(&vp->v_vsd_lock);
   1747 	VERIFY(vsd_set(vp, instp->vkey, (void *)fp) == 0);
   1748 	mutex_exit(&vp->v_vsd_lock);
   1749 
   1750 	return (TRUE);
   1751 }
   1752 
   1753 rfs4_file_t *
   1754 rfs4_findfile(nfs_server_instance_t *instp, vnode_t *vp, nfs_fh4 *fh,
   1755 	    bool_t *create)
   1756 {
   1757 	rfs4_file_t *fp;
   1758 	rfs4_fcreate_arg arg;
   1759 
   1760 	arg.vp = vp;
   1761 	arg.fh = fh;
   1762 
   1763 	if (*create == TRUE)
   1764 		fp = (rfs4_file_t *)rfs4_dbsearch(instp->file_idx, vp,
   1765 		    create, &arg, RFS4_DBS_VALID);
   1766 	else {
   1767 		mutex_enter(&vp->v_vsd_lock);
   1768 		fp = (rfs4_file_t *)vsd_get(vp, instp->vkey);
   1769 		if (fp) {
   1770 			rfs4_dbe_lock(fp->rf_dbe);
   1771 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
   1772 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
   1773 				rfs4_dbe_unlock(fp->rf_dbe);
   1774 				fp = NULL;
   1775 			} else {
   1776 				rfs4_dbe_hold(fp->rf_dbe);
   1777 				rfs4_dbe_unlock(fp->rf_dbe);
   1778 			}
   1779 		}
   1780 		mutex_exit(&vp->v_vsd_lock);
   1781 	}
   1782 	return (fp);
   1783 }
   1784 
   1785 /*
   1786  * Find a file in the db and once it is located, take the rw lock.
   1787  * Need to check the vnode pointer and if it does not exist (it was
   1788  * removed between the db location and check) redo the find.  This
   1789  * assumes that a file struct that has a NULL vnode pointer is marked
   1790  * at 'invalid' and will not be found in the db the second time
   1791  * around.
   1792  */
   1793 rfs4_file_t *
   1794 rfs4_findfile_withlock(nfs_server_instance_t *instp, vnode_t *vp, nfs_fh4 *fh,
   1795 	    bool_t *create)
   1796 {
   1797 	rfs4_file_t *fp;
   1798 	rfs4_fcreate_arg arg;
   1799 	bool_t screate = *create;
   1800 
   1801 	if (screate == FALSE) {
   1802 		mutex_enter(&vp->v_vsd_lock);
   1803 		fp = (rfs4_file_t *)vsd_get(vp, instp->vkey);
   1804 		if (fp) {
   1805 			rfs4_dbe_lock(fp->rf_dbe);
   1806 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
   1807 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
   1808 				rfs4_dbe_unlock(fp->rf_dbe);
   1809 				mutex_exit(&vp->v_vsd_lock);
   1810 				fp = NULL;
   1811 			} else {
   1812 				rfs4_dbe_hold(fp->rf_dbe);
   1813 				rfs4_dbe_unlock(fp->rf_dbe);
   1814 				mutex_exit(&vp->v_vsd_lock);
   1815 				rw_enter(&fp->rf_file_rwlock, RW_WRITER);
   1816 				if (fp->rf_vp == NULL) {
   1817 					rw_exit(&fp->rf_file_rwlock);
   1818 					rfs4_file_rele(fp);
   1819 					fp = NULL;
   1820 				}
   1821 			}
   1822 		} else {
   1823 			mutex_exit(&vp->v_vsd_lock);
   1824 		}
   1825 	} else {
   1826 retry:
   1827 		arg.vp = vp;
   1828 		arg.fh = fh;
   1829 
   1830 		fp = (rfs4_file_t *)rfs4_dbsearch(instp->file_idx, vp,
   1831 		    create, &arg, RFS4_DBS_VALID);
   1832 		if (fp != NULL) {
   1833 			rw_enter(&fp->rf_file_rwlock, RW_WRITER);
   1834 			if (fp->rf_vp == NULL) {
   1835 				rw_exit(&fp->rf_file_rwlock);
   1836 				rfs4_file_rele(fp);
   1837 				*create = screate;
   1838 				goto retry;
   1839 			}
   1840 		}
   1841 	}
   1842 
   1843 	return (fp);
   1844 }
   1845 
   1846 uint32_t
   1847 lo_state_hash(void *key)
   1848 {
   1849 	stateid_t *id = key;
   1850 
   1851 	return (id->v4_bits.state_ident+id->v4_bits.pid);
   1852 }
   1853 
   1854 bool_t
   1855 lo_state_compare(rfs4_entry_t u_entry, void *key)
   1856 {
   1857 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   1858 	stateid_t *id = key;
   1859 	bool_t rc;
   1860 
   1861 	rc = (lsp->rls_lockid.v4_bits.boottime == id->v4_bits.boottime &&
   1862 	    lsp->rls_lockid.v4_bits.type == id->v4_bits.type &&
   1863 	    lsp->rls_lockid.v4_bits.state_ident == id->v4_bits.state_ident &&
   1864 	    lsp->rls_lockid.v4_bits.pid == id->v4_bits.pid);
   1865 
   1866 	return (rc);
   1867 }
   1868 
   1869 void *
   1870 lo_state_mkkey(rfs4_entry_t u_entry)
   1871 {
   1872 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   1873 
   1874 	return (&lsp->rls_lockid);
   1875 }
   1876 
   1877 bool_t
   1878 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
   1879 {
   1880 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   1881 
   1882 	if (rfs4_dbe_is_invalid(lsp->rls_dbe))
   1883 		return (TRUE);
   1884 	if (lsp->rls_state->rs_closed)
   1885 		return (TRUE);
   1886 	return ((gethrestime_sec() -
   1887 	    lsp->rls_state->rs_owner->ro_client->rc_last_access
   1888 	    > dbe_to_instp(lsp->rls_dbe)->lease_period));
   1889 }
   1890 
   1891 void
   1892 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
   1893 {
   1894 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   1895 
   1896 	rfs4_dbe_lock(lsp->rls_state->rs_dbe);
   1897 	list_remove(&lsp->rls_state->rs_lostatelist, lsp);
   1898 	rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
   1899 
   1900 	rfs4_sw_destroy(&lsp->rls_sw);
   1901 
   1902 	/* Make sure to release the file locks */
   1903 	if (lsp->rls_locks_cleaned == FALSE) {
   1904 		lsp->rls_locks_cleaned = TRUE;
   1905 		if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
   1906 			/* Is the PxFS kernel module loaded? */
   1907 			if (lm_remove_file_locks != NULL) {
   1908 				int new_sysid;
   1909 
   1910 				/* Encode the cluster nodeid in new sysid */
   1911 				new_sysid =
   1912 				    lsp->rls_locker->rl_client->rc_sysidt;
   1913 				lm_set_nlmid_flk(&new_sysid);
   1914 
   1915 				/*
   1916 				 * This PxFS routine removes file locks for a
   1917 				 * client over all nodes of a cluster.
   1918 				 */
   1919 				DTRACE_PROBE1(nfss_i_clust_rm_lck,
   1920 				    int, new_sysid);
   1921 				(*lm_remove_file_locks)(new_sysid);
   1922 			} else {
   1923 				(void) cleanlocks(
   1924 				    lsp->rls_state->rs_finfo->rf_vp,
   1925 				    lsp->rls_locker->rl_pid,
   1926 				    lsp->rls_locker->rl_client->rc_sysidt);
   1927 			}
   1928 		}
   1929 	}
   1930 
   1931 	/* Free the last reply for this state */
   1932 	rfs4_free_reply(&lsp->rls_reply);
   1933 
   1934 	rfs4_lockowner_rele(lsp->rls_locker);
   1935 	lsp->rls_locker = NULL;
   1936 
   1937 	rfs4_state_rele_nounlock(lsp->rls_state);
   1938 	lsp->rls_state = NULL;
   1939 }
   1940 
   1941 /* ARGSUSED */
   1942 bool_t
   1943 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
   1944 {
   1945 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   1946 	rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
   1947 	rfs4_lockowner_t *lo = argp->rls_locker;
   1948 	rfs4_state_t *sp = argp->rls_state;
   1949 
   1950 	lsp->rls_state = sp;
   1951 
   1952 	lsp->rls_lockid = sp->rs_stateid;
   1953 	lsp->rls_lockid.v4_bits.type = LOCKID;
   1954 	lsp->rls_lockid.v4_bits.chgseq = 0;
   1955 	lsp->rls_lockid.v4_bits.pid = lo->rl_pid;
   1956 
   1957 	lsp->rls_locks_cleaned = FALSE;
   1958 	lsp->rls_lock_completed = FALSE;
   1959 
   1960 	rfs4_sw_init(&lsp->rls_sw);
   1961 
   1962 	/* Attached the supplied lock owner */
   1963 	rfs4_dbe_hold(lo->rl_dbe);
   1964 	lsp->rls_locker = lo;
   1965 
   1966 	rfs4_dbe_lock(sp->rs_dbe);
   1967 	list_insert_tail(&sp->rs_lostatelist, lsp);
   1968 	rfs4_dbe_hold(sp->rs_dbe);
   1969 	rfs4_dbe_unlock(sp->rs_dbe);
   1970 
   1971 	return (TRUE);
   1972 }
   1973 
   1974 void
   1975 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
   1976 {
   1977 	if (unlock_fp == TRUE)
   1978 		rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
   1979 	rfs4_dbe_rele(lsp->rls_dbe);
   1980 }
   1981 
   1982 rfs4_lo_state_t *
   1983 rfs4_findlo_state(struct compound_state *cs,
   1984 		stateid_t *id, bool_t lock_fp)
   1985 {
   1986 	rfs4_lo_state_t *lsp;
   1987 	bool_t create = FALSE;
   1988 
   1989 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(cs->instp->lo_state_idx, id,
   1990 	    &create, NULL, RFS4_DBS_VALID);
   1991 	if (lock_fp == TRUE && lsp != NULL)
   1992 		rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
   1993 
   1994 	return (lsp);
   1995 }
   1996 
   1997 uint32_t
   1998 lo_state_lo_hash(void *key)
   1999 {
   2000 	rfs4_lo_state_t *lsp = key;
   2001 
   2002 	return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
   2003 }
   2004 
   2005 bool_t
   2006 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
   2007 {
   2008 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   2009 	rfs4_lo_state_t *keyp = key;
   2010 
   2011 	return (keyp->rls_locker == lsp->rls_locker &&
   2012 	    keyp->rls_state == lsp->rls_state);
   2013 }
   2014 
   2015 void *
   2016 lo_state_lo_mkkey(rfs4_entry_t u_entry)
   2017 {
   2018 	return (u_entry);
   2019 }
   2020 
   2021 rfs4_lo_state_t *
   2022 rfs4_findlo_state_by_owner(nfs_server_instance_t *instp,
   2023     rfs4_lockowner_t *lo, rfs4_state_t *sp, bool_t *create)
   2024 {
   2025 	rfs4_lo_state_t *lsp;
   2026 	rfs4_lo_state_t arg;
   2027 
   2028 	arg.rls_locker = lo;
   2029 	arg.rls_state = sp;
   2030 
   2031 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(instp->lo_state_owner_idx,
   2032 	    &arg, create, &arg, RFS4_DBS_VALID);
   2033 
   2034 	return (lsp);
   2035 }
   2036 
   2037 rfs4_lo_state_t *
   2038 findlo_state_by_owner(rfs4_lockowner_t *lo,
   2039 			rfs4_state_t *sp, bool_t *create)
   2040 {
   2041 	rfs4_lo_state_t *lsp;
   2042 	rfs4_lo_state_t arg;
   2043 	nfs_server_instance_t *instp;
   2044 
   2045 	arg.rls_locker = lo;
   2046 	arg.rls_state = sp;
   2047 
   2048 	instp = dbe_to_instp(lo->rl_dbe);
   2049 
   2050 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(instp->lo_state_owner_idx,
   2051 	    &arg, create, &arg, RFS4_DBS_VALID);
   2052 
   2053 	return (lsp);
   2054 }
   2055 
   2056 static stateid_t
   2057 get_stateid(nfs_server_instance_t *instp, id_t eid, stateid_type_t id_type)
   2058 {
   2059 	stateid_t id;
   2060 
   2061 	id.v4_bits.boottime = instp->start_time;
   2062 	id.v4_bits.state_ident = eid;
   2063 	id.v4_bits.chgseq = 0;
   2064 	id.v4_bits.type = id_type;
   2065 	id.v4_bits.pid = 0;
   2066 
   2067 	/*
   2068 	 * If we are booted as a cluster node, embed our nodeid.
   2069 	 * We've already done sanity checks in rfs4_client_create() so no
   2070 	 * need to repeat them here.
   2071 	 */
   2072 	id.v4_bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
   2073 	    clconf_get_nodeid() : 0;
   2074 
   2075 	return (id);
   2076 }
   2077 
   2078 /*
   2079  * For use only when booted as a cluster node.
   2080  * Returns TRUE if the embedded nodeid indicates that this stateid was
   2081  * generated on another node.
   2082  */
   2083 static int
   2084 foreign_stateid(stateid_t *id)
   2085 {
   2086 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
   2087 	return (id->v4_bits.clnodeid != (uint32_t)clconf_get_nodeid());
   2088 }
   2089 
   2090 /*
   2091  * For use only when booted as a cluster node.
   2092  * Returns TRUE if the embedded nodeid indicates that this clientid was
   2093  * generated on another node.
   2094  */
   2095 static int
   2096 foreign_clientid(cid *cidp)
   2097 {
   2098 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
   2099 	return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
   2100 	    (uint32_t)clconf_get_nodeid());
   2101 }
   2102 
   2103 /*
   2104  * For use only when booted as a cluster node.
   2105  * Embed our cluster nodeid into the clientid.
   2106  */
   2107 static void
   2108 embed_nodeid(cid *cidp)
   2109 {
   2110 	int clnodeid;
   2111 	/*
   2112 	 * Currently, our state tables are small enough that their
   2113 	 * ids will leave enough bits free for the nodeid. If the
   2114 	 * tables become larger, we mustn't overwrite the id.
   2115 	 * Equally, we only have room for so many bits of nodeid, so
   2116 	 * must check that too.
   2117 	 */
   2118 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
   2119 	ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
   2120 	clnodeid = clconf_get_nodeid();
   2121 	ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
   2122 	ASSERT(clnodeid != NODEID_UNKNOWN);
   2123 	cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
   2124 }
   2125 
   2126 uint32_t
   2127 state_hash(void *key)
   2128 {
   2129 	stateid_t *ip = (stateid_t *)key;
   2130 
   2131 	return (ip->v4_bits.state_ident);
   2132 }
   2133 
   2134 bool_t
   2135 state_compare(rfs4_entry_t u_entry, void *key)
   2136 {
   2137 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2138 	stateid_t *id = (stateid_t *)key;
   2139 	bool_t rc;
   2140 
   2141 	rc = (sp->rs_stateid.v4_bits.boottime == id->v4_bits.boottime &&
   2142 	    sp->rs_stateid.v4_bits.state_ident == id->v4_bits.state_ident);
   2143 
   2144 	return (rc);
   2145 }
   2146 
   2147 void *
   2148 state_mkkey(rfs4_entry_t u_entry)
   2149 {
   2150 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2151 
   2152 	return (&sp->rs_stateid);
   2153 }
   2154 
   2155 void
   2156 rfs4_state_destroy(rfs4_entry_t u_entry)
   2157 {
   2158 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2159 
   2160 	/* remove from openowner list */
   2161 	rfs4_dbe_lock(sp->rs_owner->ro_dbe);
   2162 	list_remove(&sp->rs_owner->ro_statelist, sp);
   2163 	rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
   2164 
   2165 	list_destroy(&sp->rs_lostatelist);
   2166 
   2167 	/* release any share locks for this stateid if it's still open */
   2168 	if (!sp->rs_closed) {
   2169 		rfs4_dbe_lock(sp->rs_dbe);
   2170 		(void) rfs4_unshare(sp);
   2171 		rfs4_dbe_unlock(sp->rs_dbe);
   2172 	}
   2173 
   2174 	/* We are done with the file */
   2175 	rfs4_file_rele(sp->rs_finfo);
   2176 	sp->rs_finfo = NULL;
   2177 
   2178 	/* And now with the openowner */
   2179 	rfs4_openowner_rele(sp->rs_owner);
   2180 	sp->rs_owner = NULL;
   2181 }
   2182 
   2183 
   2184 uint32_t
   2185 deleg_hash(void *key)
   2186 {
   2187 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
   2188 
   2189 	return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
   2190 }
   2191 
   2192 bool_t
   2193 deleg_compare(rfs4_entry_t u_entry, void *key)
   2194 {
   2195 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2196 	rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
   2197 
   2198 	return (dsp->rds_client == kdsp->rds_client &&
   2199 	    dsp->rds_finfo == kdsp->rds_finfo);
   2200 }
   2201 
   2202 void *
   2203 deleg_mkkey(rfs4_entry_t u_entry)
   2204 {
   2205 	return (u_entry);
   2206 }
   2207 
   2208 uint32_t
   2209 deleg_state_hash(void *key)
   2210 {
   2211 	stateid_t *ip = (stateid_t *)key;
   2212 
   2213 	return (ip->v4_bits.state_ident);
   2214 }
   2215 
   2216 bool_t
   2217 deleg_state_compare(rfs4_entry_t u_entry, void *key)
   2218 {
   2219 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2220 	stateid_t *id = (stateid_t *)key;
   2221 	bool_t rc;
   2222 
   2223 	if (id->v4_bits.type != DELEGID)
   2224 		return (FALSE);
   2225 
   2226 	rc = (dsp->rds_delegid.v4_bits.boottime == id->v4_bits.boottime &&
   2227 	    dsp->rds_delegid.v4_bits.state_ident == id->v4_bits.state_ident);
   2228 
   2229 	return (rc);
   2230 }
   2231 
   2232 void *
   2233 deleg_state_mkkey(rfs4_entry_t u_entry)
   2234 {
   2235 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2236 
   2237 	return (&dsp->rds_delegid);
   2238 }
   2239 
   2240 bool_t
   2241 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
   2242 {
   2243 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2244 
   2245 	if (rfs4_dbe_is_invalid(dsp->rds_dbe))
   2246 		return (TRUE);
   2247 
   2248 	if ((gethrestime_sec() - dsp->rds_client->rc_last_access
   2249 	    > dbe_to_instp(dsp->rds_dbe)->lease_period)) {
   2250 		rfs4_dbe_invalidate(dsp->rds_dbe);
   2251 		return (TRUE);
   2252 	}
   2253 
   2254 	return (FALSE);
   2255 }
   2256 
   2257 bool_t
   2258 rfs4_deleg_state_create(rfs4_entry_t u_entry,
   2259 			void *argp)
   2260 {
   2261 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2262 	rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
   2263 	rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
   2264 
   2265 	rfs4_dbe_hold(fp->rf_dbe);
   2266 	rfs4_dbe_hold(cp->rc_dbe);
   2267 
   2268 	dsp->rds_delegid = get_stateid(dbe_to_instp(dsp->rds_dbe),
   2269 	    rfs4_dbe_getid(dsp->rds_dbe), DELEGID);
   2270 	dsp->rds_finfo = fp;
   2271 	dsp->rds_client = cp;
   2272 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
   2273 
   2274 	dsp->rds_time_granted = gethrestime_sec();	/* observability */
   2275 	dsp->rds_time_revoked = 0;
   2276 
   2277 	list_link_init(&dsp->rds_node);
   2278 
   2279 	/* cb race-detection support */
   2280 	dsp->rds_rs.refcnt = dsp->rds_rs.seqid = dsp->rds_rs.slotno = 0;
   2281 	bzero(&dsp->rds_rs.sessid, sizeof (sessionid4));
   2282 
   2283 	return (TRUE);
   2284 }
   2285 
   2286 void
   2287 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
   2288 {
   2289 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   2290 
   2291 	/* return delegation if necessary */
   2292 	rfs4_return_deleg(dsp, FALSE);
   2293 
   2294 	/* Were done with the file */
   2295 	rfs4_file_rele(dsp->rds_finfo);
   2296 	dsp->rds_finfo = NULL;
   2297 
   2298 	/* And now with the openowner */
   2299 	rfs4_client_rele(dsp->rds_client);
   2300 	dsp->rds_client = NULL;
   2301 }
   2302 
   2303 rfs4_deleg_state_t *
   2304 rfs4_finddeleg(struct compound_state *cs,
   2305 	rfs4_state_t *sp, bool_t *create)
   2306 {
   2307 	rfs4_deleg_state_t ds, *dsp;
   2308 
   2309 	ds.rds_client = sp->rs_owner->ro_client;
   2310 	ds.rds_finfo = sp->rs_finfo;
   2311 
   2312 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(cs->instp->deleg_idx, &ds,
   2313 	    create, &ds, RFS4_DBS_VALID);
   2314 
   2315 	return (dsp);
   2316 }
   2317 
   2318 rfs4_deleg_state_t *
   2319 rfs4_finddelegstate(struct compound_state *cs,
   2320 		    stateid_t *id)
   2321 {
   2322 	rfs4_deleg_state_t *dsp;
   2323 	bool_t create = FALSE;
   2324 
   2325 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(cs->instp->deleg_state_idx,
   2326 	    id, &create, NULL, RFS4_DBS_VALID);
   2327 
   2328 	return (dsp);
   2329 }
   2330 
   2331 void
   2332 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
   2333 {
   2334 	rfs4_dbe_rele(dsp->rds_dbe);
   2335 }
   2336 
   2337 /*
   2338  * XXX NFSv4.0 ONLY !!
   2339  */
   2340 void
   2341 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
   2342 {
   2343 
   2344 	rfs4_dbe_lock(lsp->rls_dbe);
   2345 
   2346 	/*
   2347 	 * If we are skipping sequence id checking, this means that
   2348 	 * this is the first lock request and therefore the sequence
   2349 	 * id does not need to be updated.  This only happens on the
   2350 	 * first lock request for a lockowner
   2351 	 */
   2352 	if (!lsp->rls_skip_seqid_check)
   2353 		lsp->rls_seqid++;
   2354 
   2355 	rfs4_dbe_unlock(lsp->rls_dbe);
   2356 }
   2357 
   2358 /*
   2359  * XXX NFSv4.0 ONLY !!
   2360  */
   2361 void
   2362 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
   2363 {
   2364 	ASSERT(!(dbe_to_instp(lsp->rls_dbe)->inst_flags & NFS_INST_v41));
   2365 
   2366 	rfs4_dbe_lock(lsp->rls_dbe);
   2367 
   2368 	rfs4_free_reply(&lsp->rls_reply);
   2369 
   2370 	rfs4_copy_reply(&lsp->rls_reply, resp);
   2371 
   2372 	rfs4_dbe_unlock(lsp->rls_dbe);
   2373 }
   2374 
   2375 void
   2376 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
   2377     bool_t close_of_client)
   2378 {
   2379 	rfs4_state_t *sp;
   2380 
   2381 	rfs4_dbe_lock(oo->ro_dbe);
   2382 
   2383 	for (sp = list_head(&oo->ro_statelist); sp != NULL;
   2384 	    sp = list_next(&oo->ro_statelist, sp)) {
   2385 		rfs4_state_close(sp, FALSE, close_of_client, CRED());
   2386 		if (invalidate == TRUE)
   2387 			rfs4_dbe_invalidate(sp->rs_dbe);
   2388 	}
   2389 
   2390 	rfs4_dbe_invalidate(oo->ro_dbe);
   2391 	rfs4_dbe_unlock(oo->ro_dbe);
   2392 }
   2393 
   2394 uint32_t
   2395 state_owner_file_hash(void *key)
   2396 {
   2397 	rfs4_state_t *sp = key;
   2398 
   2399 	return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
   2400 }
   2401 
   2402 bool_t
   2403 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
   2404 {
   2405 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2406 	rfs4_state_t *arg = key;
   2407 
   2408 	if (sp->rs_closed == TRUE)
   2409 		return (FALSE);
   2410 
   2411 	return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
   2412 }
   2413 
   2414 void *
   2415 state_owner_file_mkkey(rfs4_entry_t u_entry)
   2416 {
   2417 	return (u_entry);
   2418 }
   2419 
   2420 uint32_t
   2421 state_file_hash(void *key)
   2422 {
   2423 	return (ADDRHASH(key));
   2424 }
   2425 
   2426 bool_t
   2427 state_file_compare(rfs4_entry_t u_entry, void *key)
   2428 {
   2429 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2430 	rfs4_file_t *fp = key;
   2431 
   2432 	if (sp->rs_closed == TRUE)
   2433 		return (FALSE);
   2434 
   2435 	return (fp == sp->rs_finfo);
   2436 }
   2437 
   2438 void *
   2439 state_file_mkkey(rfs4_entry_t u_entry)
   2440 {
   2441 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2442 
   2443 	return (sp->rs_finfo);
   2444 }
   2445 
   2446 rfs4_state_t *
   2447 rfs4_findstate_by_owner_file(struct compound_state *cs,
   2448     rfs4_openowner_t *oo, rfs4_file_t *fp, bool_t *create)
   2449 {
   2450 	rfs4_state_t *sp;
   2451 	rfs4_state_t key;
   2452 
   2453 	key.rs_owner = oo;
   2454 	key.rs_finfo = fp;
   2455 
   2456 	sp = (rfs4_state_t *)rfs4_dbsearch(cs->instp->state_owner_file_idx,
   2457 	    &key, create, &key, RFS4_DBS_VALID);
   2458 
   2459 	return (sp);
   2460 }
   2461 
   2462 /*
   2463  * This returns ANY state struct that refers
   2464  * to this file.
   2465  */
   2466 static rfs4_state_t *
   2467 findstate_by_file(nfs_server_instance_t *instp, rfs4_file_t *fp)
   2468 {
   2469 	bool_t create = FALSE;
   2470 
   2471 	return ((rfs4_state_t *)rfs4_dbsearch(instp->state_file_idx, fp,
   2472 	    &create, fp, RFS4_DBS_VALID));
   2473 }
   2474 
   2475 bool_t
   2476 rfs4_state_expiry(rfs4_entry_t u_entry)
   2477 {
   2478 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2479 	time_t lease;
   2480 
   2481 	if (rfs4_dbe_is_invalid(sp->rs_dbe))
   2482 		return (TRUE);
   2483 
   2484 	lease = dbe_to_instp(sp->rs_dbe)->lease_period;
   2485 
   2486 	if (sp->rs_closed == TRUE &&
   2487 	    ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
   2488 	    > lease))
   2489 		return (TRUE);
   2490 
   2491 	return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
   2492 	    > lease));
   2493 }
   2494 
   2495 bool_t
   2496 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
   2497 {
   2498 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   2499 	rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
   2500 	rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
   2501 
   2502 	rfs4_dbe_hold(fp->rf_dbe);
   2503 	rfs4_dbe_hold(oo->ro_dbe);
   2504 	sp->rs_stateid = get_stateid(dbe_to_instp(sp->rs_dbe),
   2505 	    rfs4_dbe_getid(sp->rs_dbe), OPENID);
   2506 	sp->rs_owner = oo;
   2507 	sp->rs_finfo = fp;
   2508 
   2509 	list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
   2510 	    offsetof(rfs4_lo_state_t, rls_node));
   2511 
   2512 	/* Insert state on per open owner's list */
   2513 	rfs4_dbe_lock(oo->ro_dbe);
   2514 	list_insert_tail(&oo->ro_statelist, sp);
   2515 	rfs4_dbe_unlock(oo->ro_dbe);
   2516 
   2517 	return (TRUE);
   2518 }
   2519 
   2520 rfs4_state_t *
   2521 rfs4_findstate(struct compound_state *cs, stateid_t *id,
   2522     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
   2523 {
   2524 	rfs4_state_t *sp;
   2525 	bool_t create = FALSE;
   2526 
   2527 	sp = (rfs4_state_t *)rfs4_dbsearch(cs->instp->state_idx, id,
   2528 	    &create, NULL, find_invalid);
   2529 	if (lock_fp == TRUE && sp != NULL)
   2530 		rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
   2531 
   2532 	return (sp);
   2533 }
   2534 
   2535 void
   2536 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
   2537     cred_t *cr)
   2538 {
   2539 	/* Remove the associated lo_state owners */
   2540 	if (!lock_held)
   2541 		rfs4_dbe_lock(sp->rs_dbe);
   2542 
   2543 	/*
   2544 	 * If refcnt == 0, the dbe is about to be destroyed.
   2545 	 * lock state will be released by the reaper thread.
   2546 	 */
   2547 
   2548 	if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
   2549 		if (sp->rs_closed == FALSE) {
   2550 			rfs4_release_share_lock_state(sp, cr, close_of_client);
   2551 			sp->rs_closed = TRUE;
   2552 		}
   2553 	}
   2554 
   2555 	if (!lock_held)
   2556 		rfs4_dbe_unlock(sp->rs_dbe);
   2557 }
   2558 
   2559 /*
   2560  * Remove all state associated with the given client.
   2561  */
   2562 void
   2563 rfs4_client_state_remove(rfs4_client_t *cp)
   2564 {
   2565 	rfs4_openowner_t *oo;
   2566 
   2567 	rfs4_dbe_lock(cp->rc_dbe);
   2568 
   2569 	for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
   2570 	    oo = list_next(&cp->rc_openownerlist, oo)) {
   2571 		rfs4_free_opens(oo, TRUE, TRUE);
   2572 	}
   2573 
   2574 	rfs4_dbe_unlock(cp->rc_dbe);
   2575 }
   2576 
   2577 void
   2578 rfs4_client_close(rfs4_client_t *cp)
   2579 {
   2580 	/* Mark client as going away. */
   2581 	rfs4_dbe_lock(cp->rc_dbe);
   2582 	rfs4_dbe_invalidate(cp->rc_dbe);
   2583 	rfs4_dbe_unlock(cp->rc_dbe);
   2584 
   2585 	rfs4_free_cred_princ(cp);
   2586 	rfs4_client_state_remove(cp);
   2587 
   2588 	/* Release the client */
   2589 	rfs4_client_rele(cp);
   2590 }
   2591 
   2592 nfsstat4
   2593 get_clientid_err(nfs_server_instance_t *instp,
   2594 		clientid4 *cp, int setclid_confirm)
   2595 {
   2596 	cid *cidp = (cid *) cp;
   2597 
   2598 	/*
   2599 	 * If we are booted as a cluster node, check the embedded nodeid.
   2600 	 * If it indicates that this clientid was generated on another node,
   2601 	 * inform the client accordingly.
   2602 	 */
   2603 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
   2604 		return (NFS4ERR_STALE_CLIENTID);
   2605 
   2606 	/*
   2607 	 * If the server start time matches the time provided
   2608 	 * by the client (via the clientid) and this is NOT a
   2609 	 * setclientid_confirm then return EXPIRED.
   2610 	 */
   2611 	if (!setclid_confirm && cidp->impl_id.start_time == instp->start_time)
   2612 		return (NFS4ERR_EXPIRED);
   2613 
   2614 	return (NFS4ERR_STALE_CLIENTID);
   2615 }
   2616 
   2617 
   2618 nfsstat4
   2619 rfs4_check_clientid(nfs_server_instance_t *instp, clientid4 *cp)
   2620 {
   2621 	cid *cidp = (cid *) cp;
   2622 
   2623 	/*
   2624 	 * If we are booted as a cluster node, check the embedded nodeid.
   2625 	 * If it indicates that this clientid was generated on another node,
   2626 	 * inform the client accordingly.
   2627 	 */
   2628 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
   2629 		return (NFS4ERR_STALE_CLIENTID);
   2630 
   2631 	/*
   2632 	 * If the server start time matches the time provided
   2633 	 * by the client (via the clientid) and this is NOT a
   2634 	 * setclientid_confirm then return EXPIRED.
   2635 	 */
   2636 	if (cidp->impl_id.start_time == instp->start_time)
   2637 		return (NFS4ERR_EXPIRED);
   2638 
   2639 	return (NFS4ERR_STALE_CLIENTID);
   2640 }
   2641 
   2642 
   2643 /*
   2644  * This is used when a stateid has not been found amongst the
   2645  * current server's state.  Check the stateid to see if it
   2646  * was from this server instantiation or not.
   2647  */
   2648 static nfsstat4
   2649 what_stateid_error(struct compound_state *cs,
   2650 		stateid_t *id, stateid_type_t type)
   2651 {
   2652 	/* If we are booted as a cluster node, was stateid locally generated? */
   2653 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
   2654 		return (NFS4ERR_STALE_STATEID);
   2655 
   2656 	/* If types don't match then no use checking further */
   2657 	if (type != id->v4_bits.type)
   2658 		return (NFS4ERR_BAD_STATEID);
   2659 
   2660 	/* From a previous server instantiation, return STALE */
   2661 	if (id->v4_bits.boottime < cs->instp->start_time)
   2662 		return (NFS4ERR_STALE_STATEID);
   2663 
   2664 	/*
   2665 	 * From this server but the state is most likely beyond lease
   2666 	 * timeout: return NFS4ERR_EXPIRED.  However, there is the
   2667 	 * case of a delegation stateid.  For delegations, there is a
   2668 	 * case where the state can be removed without the client's
   2669 	 * knowledge/consent: revocation.  In the case of delegation
   2670 	 * revocation, the delegation state will be removed and will
   2671 	 * not be found.  If the client does something like a
   2672 	 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
   2673 	 * that has been revoked, the server should return BAD_STATEID
   2674 	 * instead of the more common EXPIRED error.
   2675 	 */
   2676 	if (id->v4_bits.boottime == cs->instp->start_time) {
   2677 		if (type == DELEGID)
   2678 			return (NFS4ERR_BAD_STATEID);
   2679 		else
   2680 			return (NFS4ERR_EXPIRED);
   2681 	}
   2682 
   2683 	return (NFS4ERR_BAD_STATEID);
   2684 }
   2685 
   2686 /*
   2687  * Used later on to find the various state structs.  When called from
   2688  * check_stateid()->rfs4_get_all_state(), no file struct lock is
   2689  * taken (it is not needed) and helps on the read/write path with
   2690  * respect to performance.
   2691  */
   2692 static nfsstat4
   2693 rfs4_get_state_lockit(struct compound_state *cs, stateid4 *stateid,
   2694     rfs4_state_t **spp, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
   2695 {
   2696 	stateid_t *id = (stateid_t *)stateid;
   2697 	rfs4_state_t *sp;
   2698 
   2699 	*spp = NULL;
   2700 
   2701 	/* If we are booted as a cluster node, was stateid locally generated? */
   2702 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
   2703 		return (NFS4ERR_STALE_STATEID);
   2704 
   2705 	sp = rfs4_findstate(cs, id, find_invalid, lock_fp);
   2706 	if (sp == NULL) {
   2707 		return (what_stateid_error(cs, id, OPENID));
   2708 	}
   2709 
   2710 	if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
   2711 		if (lock_fp == TRUE)
   2712 			rfs4_state_rele(sp);
   2713 		else
   2714 			rfs4_state_rele_nounlock(sp);
   2715 		return (NFS4ERR_EXPIRED);
   2716 	}
   2717 
   2718 	*spp = sp;
   2719 
   2720 	return (NFS4_OK);
   2721 }
   2722 
   2723 nfsstat4
   2724 rfs4_get_state(struct compound_state *cs, stateid4 *stateid,
   2725     rfs4_state_t **spp, rfs4_dbsearch_type_t find_invalid)
   2726 {
   2727 	return (rfs4_get_state_lockit(cs, stateid, spp, find_invalid, TRUE));
   2728 }
   2729 
   2730 int
   2731 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
   2732 {
   2733 	stateid_t *id = (stateid_t *)stateid;
   2734 
   2735 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
   2736 		return (NFS4_CHECK_STATEID_EXPIRED);
   2737 
   2738 	/* Stateid is some time in the future - that's bad */
   2739 	if (sp->rs_stateid.v4_bits.chgseq < id->v4_bits.chgseq)
   2740 		return (NFS4_CHECK_STATEID_BAD);
   2741 
   2742 	if (sp->rs_stateid.v4_bits.chgseq == id->v4_bits.chgseq + 1)
   2743 		return (NFS4_CHECK_STATEID_REPLAY);
   2744 
   2745 	/* Stateid is some time in the past - that's old */
   2746 	if (sp->rs_stateid.v4_bits.chgseq > id->v4_bits.chgseq)
   2747 		return (NFS4_CHECK_STATEID_OLD);
   2748 
   2749 	/* Caller needs to know about confirmation before closure */
   2750 	if (sp->rs_owner->ro_need_confirm)
   2751 		return (NFS4_CHECK_STATEID_UNCONFIRMED);
   2752 
   2753 	if (sp->rs_closed == TRUE)
   2754 		return (NFS4_CHECK_STATEID_CLOSED);
   2755 
   2756 	return (NFS4_CHECK_STATEID_OKAY);
   2757 }
   2758 
   2759 int
   2760 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
   2761 {
   2762 	stateid_t *id = (stateid_t *)stateid;
   2763 
   2764 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
   2765 		return (NFS4_CHECK_STATEID_EXPIRED);
   2766 
   2767 	/* Stateid is some time in the future - that's bad */
   2768 	if (lsp->rls_lockid.v4_bits.chgseq < id->v4_bits.chgseq)
   2769 		return (NFS4_CHECK_STATEID_BAD);
   2770 
   2771 	if (lsp->rls_lockid.v4_bits.chgseq == id->v4_bits.chgseq + 1)
   2772 		return (NFS4_CHECK_STATEID_REPLAY);
   2773 
   2774 	/* Stateid is some time in the past - that's old */
   2775 	if (lsp->rls_lockid.v4_bits.chgseq > id->v4_bits.chgseq)
   2776 		return (NFS4_CHECK_STATEID_OLD);
   2777 
   2778 	if (lsp->rls_state->rs_closed == TRUE)
   2779 		return (NFS4_CHECK_STATEID_CLOSED);
   2780 
   2781 	return (NFS4_CHECK_STATEID_OKAY);
   2782 }
   2783 
   2784 nfsstat4
   2785 rfs4_get_deleg_state(struct compound_state *cs,
   2786 		stateid4 *stateid, rfs4_deleg_state_t **dspp)
   2787 {
   2788 	stateid_t *id = (stateid_t *)stateid;
   2789 	rfs4_deleg_state_t *dsp;
   2790 
   2791 	*dspp = NULL;
   2792 
   2793 	/* If we are booted as a cluster node, was stateid locally generated? */
   2794 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
   2795 		return (NFS4ERR_STALE_STATEID);
   2796 
   2797 	dsp = rfs4_finddelegstate(cs, id);
   2798 	if (dsp == NULL) {
   2799 		return (what_stateid_error(cs, id, DELEGID));
   2800 	}
   2801 
   2802 	if (rfs4_lease_expired(dsp->rds_client)) {
   2803 		rfs4_deleg_state_rele(dsp);
   2804 		return (NFS4ERR_EXPIRED);
   2805 	}
   2806 
   2807 	*dspp = dsp;
   2808 
   2809 	return (NFS4_OK);
   2810 }
   2811 
   2812 nfsstat4
   2813 rfs4_get_lo_state(struct compound_state *cs,
   2814 		stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
   2815 {
   2816 	stateid_t *id = (stateid_t *)stateid;
   2817 	rfs4_lo_state_t *lsp;
   2818 
   2819 	*lspp = NULL;
   2820 
   2821 	/* If we are booted as a cluster node, was stateid locally generated? */
   2822 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
   2823 		return (NFS4ERR_STALE_STATEID);
   2824 
   2825 	lsp = rfs4_findlo_state(cs, id, lock_fp);
   2826 	if (lsp == NULL) {
   2827 		return (what_stateid_error(cs, id, LOCKID));
   2828 	}
   2829 
   2830 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
   2831 		rfs4_lo_state_rele(lsp, lock_fp);
   2832 		return (NFS4ERR_EXPIRED);
   2833 	}
   2834 
   2835 	*lspp = lsp;
   2836 
   2837 	return (NFS4_OK);
   2838 }
   2839 
   2840 /* v4.0 only */
   2841 nfsstat4
   2842 rfs4_get_all_state(struct compound_state *cs, stateid4 *sid,
   2843     rfs4_state_t **spp, rfs4_deleg_state_t **dspp,
   2844     rfs4_lo_state_t **lospp)
   2845 {
   2846 	rfs4_state_t *sp = NULL;
   2847 	rfs4_deleg_state_t *dsp = NULL;
   2848 	rfs4_lo_state_t *lsp = NULL;
   2849 	stateid_t *id;
   2850 	nfsstat4 status;
   2851 
   2852 	*spp = NULL; *dspp = NULL; *lospp = NULL;
   2853 
   2854 	id = (stateid_t *)sid;
   2855 	switch (id->v4_bits.type) {
   2856 	case OPENID:
   2857 		status = rfs4_get_state_lockit(cs, sid,
   2858 		    &sp, RFS4_DBS_VALID, FALSE);
   2859 		break;
   2860 	case DELEGID:
   2861 		status = rfs4_get_deleg_state(cs, sid, &dsp);
   2862 		break;
   2863 	case LOCKID:
   2864 		/*
   2865 		 * NB: If this was a lock stateid we return to the caller
   2866 		 * the lock state via lospp and the associated open stateid
   2867 		 * that established the lock state in spp.
   2868 		 */
   2869 		status = rfs4_get_lo_state(cs, sid, &lsp, FALSE);
   2870 		if (status == NFS4_OK) {
   2871 			sp = lsp->rls_state;
   2872 			rfs4_dbe_hold(sp->rs_dbe);
   2873 		}
   2874 		break;
   2875 	default:
   2876 		status = NFS4ERR_BAD_STATEID;
   2877 	}
   2878 
   2879 	if (status == NFS4_OK) {
   2880 		*spp = sp;
   2881 		*dspp = dsp;
   2882 		*lospp = lsp;
   2883 	}
   2884 
   2885 	return (status);
   2886 }
   2887 
   2888 /* ARGSUSED */
   2889 nfsstat4
   2890 mds_validate_logstateid(struct compound_state *cs, stateid_t *sid)
   2891 {
   2892 	nfsstat4 status;
   2893 	stateid4 *id = (stateid4 *)sid;
   2894 	rfs4_deleg_state_t *dsp;
   2895 	rfs4_state_t *sp;
   2896 	rfs4_lo_state_t *lsp;
   2897 
   2898 	switch (sid->v4_bits.type) {
   2899 	case DELEGID:
   2900 		status = rfs4_get_deleg_state(cs, id, &dsp);
   2901 		if (status != NFS4_OK)
   2902 			break;
   2903 
   2904 		/* Is associated server instance in its grace period? */
   2905 		if (rfs4_clnt_in_grace(dsp->rds_client)) {
   2906 			rfs4_deleg_state_rele(dsp);
   2907 			return (NFS4ERR_GRACE);
   2908 		}
   2909 		if (dsp->rds_delegid.v4_bits.chgseq != sid->v4_bits.chgseq) {
   2910 			rfs4_deleg_state_rele(dsp);
   2911 			return (NFS4ERR_BAD_STATEID);
   2912 		}
   2913 		/* Ensure specified filehandle matches */
   2914 		if (dsp->rds_finfo->rf_vp != cs->vp) {
   2915 			rfs4_deleg_state_rele(dsp);
   2916 			return (NFS4ERR_BAD_STATEID);
   2917 		}
   2918 
   2919 		rfs4_deleg_state_rele(dsp);
   2920 		break;
   2921 	case OPENID:
   2922 		status = rfs4_get_state_lockit(cs, id,
   2923 		    &sp, RFS4_DBS_VALID, FALSE);
   2924 		if (status != NFS4_OK)
   2925 			return (status);
   2926 
   2927 		/* Is associated server instance in its grace period? */
   2928 		if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
   2929 			rfs4_state_rele_nounlock(sp);
   2930 			return (NFS4ERR_GRACE);
   2931 		}
   2932 		/* Seqid in the future? - that's bad */
   2933 		if (sp->rs_stateid.v4_bits.chgseq < sid->v4_bits.chgseq) {
   2934 			rfs4_state_rele_nounlock(sp);
   2935 			return (NFS4ERR_BAD_STATEID);
   2936 		}
   2937 		/* Seqid in the past - that's old */
   2938 		if (sp->rs_stateid.v4_bits.chgseq > sid->v4_bits.chgseq) {
   2939 			rfs4_state_rele_nounlock(sp);
   2940 			return (NFS4ERR_OLD_STATEID);
   2941 		}
   2942 		/* Ensure specified filehandle matches */
   2943 		if (sp->rs_finfo->rf_vp != cs->vp) {
   2944 			rfs4_state_rele_nounlock(sp);
   2945 			return (NFS4ERR_BAD_STATEID);
   2946 		}
   2947 		if (sp->rs_owner->ro_need_confirm) {
   2948 			rfs4_state_rele_nounlock(sp);
   2949 			return (NFS4ERR_BAD_STATEID);
   2950 		}
   2951 		if (sp->rs_closed == TRUE) {
   2952 			rfs4_state_rele_nounlock(sp);
   2953 			return (NFS4ERR_OLD_STATEID);
   2954 		}
   2955 
   2956 		rfs4_state_rele_nounlock(sp);
   2957 		break;
   2958 	case LOCKID:
   2959 		status = rfs4_get_lo_state(cs, id, &lsp, FALSE);
   2960 		if (status != NFS4_OK)
   2961 			return (status);
   2962 
   2963 		/* Is associated server instance in its grace period? */
   2964 		if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
   2965 			rfs4_lo_state_rele(lsp, FALSE);
   2966 			return (NFS4ERR_GRACE);
   2967 		}
   2968 		/* Seqid in the future? - that's bad */
   2969 		if (lsp->rls_lockid.v4_bits.chgseq < sid->v4_bits.chgseq) {
   2970 			rfs4_lo_state_rele(lsp, FALSE);
   2971 			return (NFS4ERR_BAD_STATEID);
   2972 		}
   2973 		/* Seqid in the past? - that's old */
   2974 		if (lsp->rls_lockid.v4_bits.chgseq > sid->v4_bits.chgseq) {
   2975 			rfs4_lo_state_rele(lsp, FALSE);
   2976 			return (NFS4ERR_OLD_STATEID);
   2977 		}
   2978 		/* Ensure specified filehandle matches */
   2979 		if (lsp->rls_state->rs_finfo->rf_vp != cs->vp) {
   2980 			rfs4_lo_state_rele(lsp, FALSE);
   2981 			return (NFS4ERR_BAD_STATEID);
   2982 		}
   2983 		rfs4_lo_state_rele(lsp, FALSE);
   2984 		break;
   2985 	default:
   2986 		status = NFS4ERR_BAD_STATEID;
   2987 	}
   2988 
   2989 	return (status);
   2990 }
   2991 
   2992 /*
   2993  * Given the I/O mode (FREAD or FWRITE), this checks whether the
   2994  * rfs4_state_t struct has access to do this operation and if so
   2995  * return NFS4_OK; otherwise the proper NFSv4 error is returned.
   2996  */
   2997 nfsstat4
   2998 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
   2999 {
   3000 	nfsstat4 stat = NFS4_OK;
   3001 	rfs4_file_t *fp;
   3002 	bool_t create = FALSE;
   3003 
   3004 	rfs4_dbe_lock(sp->rs_dbe);
   3005 	if (mode == FWRITE) {
   3006 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
   3007 			stat = NFS4ERR_OPENMODE;
   3008 		}
   3009 	} else if (mode == FREAD) {
   3010 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
   3011 			/*
   3012 			 * If we have OPENed the file with DENYing access
   3013 			 * to both READ and WRITE then no one else could
   3014 			 * have OPENed the file, hence no conflicting READ
   3015 			 * deny.  This check is merely an optimization.
   3016 			 */
   3017 			if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
   3018 				goto out;
   3019 
   3020 			/* Check against file struct's DENY mode */
   3021 			fp = rfs4_findfile(dbe_to_instp(sp->rs_dbe),
   3022 			    vp, NULL, &create);
   3023 			if (fp != NULL) {
   3024 				int deny_read = 0;
   3025 				rfs4_dbe_lock(fp->rf_dbe);
   3026 				/*
   3027 				 * Check if any other open owner has the file
   3028 				 * OPENed with deny READ.
   3029 				 */
   3030 				if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
   3031 					deny_read = 1;
   3032 				ASSERT(fp->rf_deny_read - deny_read >= 0);
   3033 				if (fp->rf_deny_read - deny_read > 0)
   3034 					stat = NFS4ERR_OPENMODE;
   3035 				rfs4_dbe_unlock(fp->rf_dbe);
   3036 				rfs4_file_rele(fp);
   3037 			}
   3038 		}
   3039 	} else {
   3040 		/* Illegal I/O mode */
   3041 		stat = NFS4ERR_INVAL;
   3042 	}
   3043 out:
   3044 	rfs4_dbe_unlock(sp->rs_dbe);
   3045 	return (stat);
   3046 }
   3047 
   3048 /*
   3049  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
   3050  * the file is being truncated, return NFS4_OK if allowed or appropriate
   3051  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
   3052  * the associated file will be done if the I/O is not consistent with any
   3053  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
   3054  * as reader or writer as appropriate. rfs4_op_open will acquire the
   3055  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
   3056  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
   3057  * deleg parameter, we will return whether a write delegation is held by
   3058  * the client associated with this stateid.
   3059  * If the server instance associated with the relevant client is in its
   3060  * grace period, return NFS4ERR_GRACE.
   3061  */
   3062 
   3063 nfsstat4
   3064 check_stateid(int mode, struct compound_state *cs, vnode_t *vp,
   3065     stateid4 *stateid, bool_t trunc, bool_t *deleg, bool_t do_access,
   3066     caller_context_t *ct, clientid4 *cid)
   3067 {
   3068 	rfs4_file_t *fp;
   3069 	bool_t create = FALSE;
   3070 	rfs4_state_t *sp;
   3071 	rfs4_deleg_state_t *dsp;
   3072 	rfs4_lo_state_t *lsp;
   3073 	stateid_t *id = (stateid_t *)stateid;
   3074 	nfsstat4 stat = NFS4_OK;
   3075 
   3076 	if (ct != NULL) {
   3077 		ct->cc_sysid = 0;
   3078 		ct->cc_pid = 0;
   3079 		ct->cc_caller_id = cs->instp->caller_id;
   3080 		ct->cc_flags = CC_DONTBLOCK;
   3081 	}
   3082 
   3083 	if (ISSPECIAL(stateid)) {
   3084 		fp = rfs4_findfile(cs->instp, vp, NULL, &create);
   3085 		if (fp == NULL)
   3086 			return (NFS4_OK);
   3087 		if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) {
   3088 			rfs4_file_rele(fp);
   3089 			return (NFS4_OK);
   3090 		}
   3091 		if (mode == FWRITE ||
   3092 		    fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) {
   3093 			rfs4_recall_deleg(fp, trunc, NULL);
   3094 			rfs4_file_rele(fp);
   3095 			return (NFS4ERR_DELAY);
   3096 		}
   3097 		rfs4_file_rele(fp);
   3098 		return (NFS4_OK);
   3099 	}
   3100 
   3101 	stat = rfs4_get_all_state(cs, stateid, &sp, &dsp, &lsp);
   3102 	if (stat != NFS4_OK)
   3103 		return (stat);
   3104 
   3105 	/*
   3106 	 * Ordering of the following 'if' statements is specific
   3107 	 * since rfs4_get_all_state() may return a value for sp and
   3108 	 * lsp. First we check lsp, then 'fall' through to sp.
   3109 	 */
   3110 	if (lsp != NULL) {
   3111 		if (cid) {
   3112 			*cid = lsp->rls_locker->rl_client->rc_clientid;
   3113 		}
   3114 		/* Is associated server instance in its grace period? */
   3115 		if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
   3116 			if (ct != NULL) {
   3117 				ct->cc_sysid =
   3118 				    lsp->rls_locker->rl_client->rc_sysidt;
   3119 				ct->cc_pid = lsp->rls_locker->rl_pid;
   3120 			}
   3121 			rfs4_lo_state_rele(lsp, FALSE);
   3122 			if (sp != NULL)
   3123 				rfs4_state_rele_nounlock(sp);
   3124 			return (NFS4ERR_GRACE);
   3125 		}
   3126 		/* Seqid in the future? - that's bad */
   3127 		if (lsp->rls_lockid.v4_bits.chgseq <
   3128 		    id->v4_bits.chgseq) {
   3129 			rfs4_lo_state_rele(lsp, FALSE);
   3130 			if (sp != NULL)
   3131 				rfs4_state_rele_nounlock(sp);
   3132 			return (NFS4ERR_BAD_STATEID);
   3133 		}
   3134 		/* Seqid in the past? - that's old */
   3135 		if (lsp->rls_lockid.v4_bits.chgseq >
   3136 		    id->v4_bits.chgseq) {
   3137 			rfs4_lo_state_rele(lsp, FALSE);
   3138 			if (sp != NULL)
   3139 				rfs4_state_rele_nounlock(sp);
   3140 			return (NFS4ERR_OLD_STATEID);
   3141 		}
   3142 		/* Ensure specified filehandle matches */
   3143 		if (lsp->rls_state->rs_finfo->rf_vp != vp) {
   3144 			rfs4_lo_state_rele(lsp, FALSE);
   3145 			if (sp != NULL)
   3146 				rfs4_state_rele_nounlock(sp);
   3147 			return (NFS4ERR_BAD_STATEID);
   3148 		}
   3149 		rfs4_lo_state_rele(lsp, FALSE);
   3150 	}
   3151 
   3152 	/*
   3153 	 * Stateid provided was an "open" or via the lock stateid
   3154 	 */
   3155 	if (sp != NULL) {
   3156 		/*
   3157 		 * only check if the passed in stateid was an OPENID,
   3158 		 * ie. Skip if we got here via the LOCKID.
   3159 		 */
   3160 		if (id->v4_bits.type == OPENID) {
   3161 			if (cid) {
   3162 				rfs4_dbe_lock(sp->rs_owner->ro_client->rc_dbe);
   3163 				*cid = sp->rs_owner->ro_client->rc_clientid;
   3164 				rfs4_dbe_unlock(sp->rs_owner->
   3165 				    ro_client->rc_dbe);
   3166 			}
   3167 			/* Is associated server instance in its grace period? */
   3168 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
   3169 				rfs4_state_rele_nounlock(sp);
   3170 				return (NFS4ERR_GRACE);
   3171 			}
   3172 			/* Seqid in the future? - that's bad */
   3173 			if (sp->rs_stateid.v4_bits.chgseq <
   3174 			    id->v4_bits.chgseq) {
   3175 				rfs4_state_rele_nounlock(sp);
   3176 				return (NFS4ERR_BAD_STATEID);
   3177 			}
   3178 			/* Seqid in the past - that's old */
   3179 			if (sp->rs_stateid.v4_bits.chgseq >
   3180 			    id->v4_bits.chgseq) {
   3181 				rfs4_state_rele_nounlock(sp);
   3182 				return (NFS4ERR_OLD_STATEID);
   3183 			}
   3184 			/* Ensure specified filehandle matches */
   3185 			if (sp->rs_finfo->rf_vp != vp) {
   3186 				rfs4_state_rele_nounlock(sp);
   3187 				return (NFS4ERR_BAD_STATEID);
   3188 			}
   3189 		}
   3190 		if (sp->rs_owner->ro_need_confirm) {
   3191 			rfs4_state_rele_nounlock(sp);
   3192 			return (NFS4ERR_BAD_STATEID);
   3193 		}
   3194 
   3195 		if (sp->rs_closed == TRUE) {
   3196 			rfs4_state_rele_nounlock(sp);
   3197 			return (NFS4ERR_OLD_STATEID);
   3198 		}
   3199 
   3200 		if (do_access)
   3201 			stat = rfs4_state_has_access(sp, mode, vp);
   3202 		else
   3203 			stat = NFS4_OK;
   3204 
   3205 		/*
   3206 		 * Return whether this state has write
   3207 		 * delegation if desired
   3208 		 */
   3209 		if (deleg &&
   3210 		    (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE))
   3211 			*deleg = TRUE;
   3212 
   3213 		/*
   3214 		 * We got a valid stateid, so we update the
   3215 		 * lease on the client. Ideally we would like
   3216 		 * to do this after the calling op succeeds,
   3217 		 * but for now this will be good
   3218 		 * enough. Callers of this routine are
   3219 		 * currently insulated from the state stuff.
   3220 		 */
   3221 		rfs4_update_lease(sp->rs_owner->ro_client);
   3222 
   3223 		/*
   3224 		 * If a delegation is present on this file and
   3225 		 * this is a WRITE, then update the lastwrite
   3226 		 * time to indicate that activity is present.
   3227 		 */
   3228 		if (sp->rs_finfo->rf_dinfo->rd_dtype ==
   3229 		    OPEN_DELEGATE_WRITE && mode == FWRITE) {
   3230 			sp->rs_finfo->rf_dinfo->rd_time_lastwrite =
   3231 			    gethrestime_sec();
   3232 		}
   3233 
   3234 		rfs4_state_rele_nounlock(sp);
   3235 		return (stat);
   3236 	}
   3237 
   3238 	if (dsp != NULL) {
   3239 		if (cid) {
   3240 			rfs4_dbe_lock(dsp->rds_client->rc_dbe);
   3241 			*cid = dsp->rds_client->rc_clientid;
   3242 			rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
   3243 		}
   3244 		/* Is associated server instance in its grace period? */
   3245 		if (rfs4_clnt_in_grace(dsp->rds_client)) {
   3246 			rfs4_deleg_state_rele(dsp);
   3247 			return (NFS4ERR_GRACE);
   3248 		}
   3249 		if (dsp->rds_delegid.v4_bits.chgseq != id->v4_bits.chgseq) {
   3250 			rfs4_deleg_state_rele(dsp);
   3251 			return (NFS4ERR_BAD_STATEID);
   3252 		}
   3253 
   3254 		/* Ensure specified filehandle matches */
   3255 		if (dsp->rds_finfo->rf_vp != vp) {
   3256 			rfs4_deleg_state_rele(dsp);
   3257 			return (NFS4ERR_BAD_STATEID);
   3258 		}
   3259 		/*
   3260 		 * Return whether this state has write
   3261 		 * delegation if desired
   3262 		 */
   3263 		if (deleg && (dsp->rds_finfo->rf_dinfo->rd_dtype ==
   3264 		    OPEN_DELEGATE_WRITE))
   3265 			*deleg = TRUE;
   3266 
   3267 		rfs4_update_lease(dsp->rds_client);
   3268 
   3269 		/*
   3270 		 * If a delegation is present on this file and
   3271 		 * this is a WRITE, then update the lastwrite
   3272 		 * time to indicate that activity is present.
   3273 		 */
   3274 		if (dsp->rds_finfo->rf_dinfo->rd_dtype ==
   3275 		    OPEN_DELEGATE_WRITE && mode == FWRITE) {
   3276 			dsp->rds_finfo->rf_dinfo->rd_time_lastwrite =
   3277 			    gethrestime_sec();
   3278 		}
   3279 
   3280 		/*
   3281 		 * XXX - what happens if this is a WRITE and the
   3282 		 * delegation type of for READ.
   3283 		 */
   3284 		rfs4_deleg_state_rele(dsp);
   3285 
   3286 		return (stat);
   3287 	}
   3288 	/*
   3289 	 * If we got this far, something bad happened
   3290 	 */
   3291 	return (NFS4ERR_BAD_STATEID);
   3292 }
   3293 
   3294 
   3295 /*
   3296  * This is a special function in that for the file struct provided the
   3297  * server wants to remove/close all current state associated with the
   3298  * file.  The prime use of this would be with OP_REMOVE to force the
   3299  * release of state and particularly of file locks.
   3300  *
   3301  * There is an assumption that there is no delegations outstanding on
   3302  * this file at this point.  The caller should have waited for those
   3303  * to be returned or revoked.
   3304  */
   3305 void
   3306 rfs4_close_all_state(rfs4_file_t *fp)
   3307 {
   3308 	nfs_server_instance_t *instp;
   3309 	rfs4_state_t *sp;
   3310 
   3311 	rfs4_dbe_lock(fp->rf_dbe);
   3312 
   3313 	/* No delegations for this file */
   3314 	ASSERT(list_is_empty(&fp->rf_delegstatelist));
   3315 
   3316 	/* Make sure that it can not be found */
   3317 	rfs4_dbe_invalidate(fp->rf_dbe);
   3318 
   3319 	if (fp->rf_vp == NULL) {
   3320 		rfs4_dbe_unlock(fp->rf_dbe);
   3321 		return;
   3322 	}
   3323 	rfs4_dbe_unlock(fp->rf_dbe);
   3324 
   3325 	instp = dbe_to_instp(fp->rf_dbe);
   3326 
   3327 	/*
   3328 	 * Hold as writer to prevent other server threads from
   3329 	 * processing requests related to the file while all state is
   3330 	 * being removed.
   3331 	 */
   3332 	rw_enter(&fp->rf_file_rwlock, RW_WRITER);
   3333 
   3334 	/* Remove ALL state from the file */
   3335 	while (sp = findstate_by_file(instp, fp)) {
   3336 		rfs4_state_close(sp, FALSE, FALSE, CRED());
   3337 		rfs4_state_rele_nounlock(sp);
   3338 	}
   3339 
   3340 	/*
   3341 	 * This is only safe since there are no further references to
   3342 	 * the file.
   3343 	 */
   3344 	rfs4_dbe_lock(fp->rf_dbe);
   3345 	if (fp->rf_vp) {
   3346 		vnode_t *vp = fp->rf_vp;
   3347 		nfs_server_instance_t *instp; /* XXX: shadows above */
   3348 
   3349 		instp = dbe_to_instp(fp->rf_dbe);
   3350 		mutex_enter(&vp->v_vsd_lock);
   3351 		(void) vsd_set(vp, instp->vkey, NULL);
   3352 		mutex_exit(&vp->v_vsd_lock);
   3353 		VN_RELE(vp);
   3354 		fp->rf_vp = NULL;
   3355 	}
   3356 	rfs4_dbe_unlock(fp->rf_dbe);
   3357 
   3358 	/* Finally let other references to proceed */
   3359 	rw_exit(&fp->rf_file_rwlock);
   3360 }
   3361 
   3362 /*
   3363  * This function is used as a target for the rfs4_dbe_walk() call
   3364  * below.  The purpose of this function is to see if the
   3365  * lockowner_state refers to a file that resides within the exportinfo
   3366  * export.  If so, then remove the lock_owner state (file locks and
   3367  * share "locks") for this object since the intent is the server is
   3368  * unexporting the specified directory.  Be sure to invalidate the
   3369  * object after the state has been released
   3370  */
   3371 static void
   3372 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
   3373 {
   3374 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
   3375 	struct exportinfo *exi = (struct exportinfo *)e;
   3376 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
   3377 	fhandle_t *efhp;
   3378 
   3379 	efhp = (fhandle_t *)&exi->exi_fh;
   3380 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
   3381 
   3382 	FH_TO_FMT4(efhp, exi_fhp);
   3383 
   3384 	finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
   3385 	    rf_filehandle.nfs_fh4_val;
   3386 
   3387 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
   3388 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
   3389 	    exi_fhp->fh4_xlen) == 0) {
   3390 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
   3391 		rfs4_dbe_invalidate(lsp->rls_dbe);
   3392 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
   3393 	}
   3394 }
   3395 
   3396 /*
   3397  * This function is used as a target for the rfs4_dbe_walk() call
   3398  * below.  The purpose of this function is to see if the state refers
   3399  * to a file that resides within the exportinfo export.  If so, then
   3400  * remove the open state for this object since the intent is the
   3401  * server is unexporting the specified directory.  The main result for
   3402  * this type of entry is to invalidate it such it will not be found in
   3403  * the future.
   3404  */
   3405 static void
   3406 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
   3407 {
   3408 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
   3409 	struct exportinfo *exi = (struct exportinfo *)e;
   3410 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
   3411 	fhandle_t *efhp;
   3412 
   3413 	efhp = (fhandle_t *)&exi->exi_fh;
   3414 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
   3415 
   3416 	FH_TO_FMT4(efhp, exi_fhp);
   3417 
   3418 	finfo_fhp =
   3419 	    (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
   3420 
   3421 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
   3422 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
   3423 	    exi_fhp->fh4_xlen) == 0) {
   3424 		rfs4_state_close(sp, TRUE, FALSE, CRED());
   3425 		rfs4_dbe_invalidate(sp->rs_dbe);
   3426 	}
   3427 }
   3428 
   3429 /*
   3430  * This function is used as a target for the rfs4_dbe_walk() call
   3431  * below.  The purpose of this function is to see if the state refers
   3432  * to a file that resides within the exportinfo export.  If so, then
   3433  * remove the deleg state for this object since the intent is the
   3434  * server is unexporting the specified directory.  The main result for
   3435  * this type of entry is to invalidate it such it will not be found in
   3436  * the future.
   3437  */
   3438 static void
   3439 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
   3440 {
   3441 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
   3442 	struct exportinfo *exi = (struct exportinfo *)e;
   3443 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
   3444 	fhandle_t *efhp;
   3445 
   3446 	efhp = (fhandle_t *)&exi->exi_fh;
   3447 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
   3448 
   3449 	FH_TO_FMT4(efhp, exi_fhp);
   3450 
   3451 	finfo_fhp =
   3452 	    (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
   3453 
   3454 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
   3455 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
   3456 	    exi_fhp->fh4_xlen) == 0) {
   3457 		rfs4_dbe_invalidate(dsp->rds_dbe);
   3458 	}
   3459 }
   3460 
   3461 /*
   3462  * This function is used as a target for the rfs4_dbe_walk() call
   3463  * below.  The purpose of this function is to see if the state refers
   3464  * to a file that resides within the exportinfo export.  If so, then
   3465  * release vnode hold for this object since the intent is the server
   3466  * is unexporting the specified directory.  Invalidation will prevent
   3467  * this struct from being found in the future.
   3468  */
   3469 static void
   3470 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
   3471 {
   3472 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
   3473 	struct exportinfo *exi = (struct exportinfo *)e;
   3474 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
   3475 	fhandle_t *efhp;
   3476 	nfs_server_instance_t *instp;
   3477 
   3478 	efhp = (fhandle_t *)&exi->exi_fh;
   3479 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
   3480 
   3481 	FH_TO_FMT4(efhp, exi_fhp);
   3482 
   3483 	finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
   3484 
   3485 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
   3486 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
   3487 	    exi_fhp->fh4_xlen) == 0) {
   3488 		if (fp->rf_vp) {
   3489 			vnode_t *vp = fp->rf_vp;
   3490 
   3491 			instp = dbe_to_instp(fp->rf_dbe);
   3492 			ASSERT(instp);
   3493 			/* don't leak monitors */
   3494 			if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_READ) {
   3495 				(void) fem_uninstall(vp, instp->deleg_rdops,
   3496 				    (void *)fp);
   3497 				vn_open_downgrade(vp, FREAD);
   3498 			} else if (fp->rf_dinfo->rd_dtype ==
   3499 			    OPEN_DELEGATE_WRITE) {
   3500 				(void) fem_uninstall(vp, instp->deleg_wrops,
   3501 				    (void *)fp);
   3502 				vn_open_downgrade(vp, FREAD|FWRITE);
   3503 			}
   3504 			mutex_enter(&vp->v_vsd_lock);
   3505 			(void) vsd_set(vp, instp->vkey, NULL);
   3506 			mutex_exit(&vp->v_vsd_lock);
   3507 			VN_RELE(vp);
   3508 			fp->rf_vp = NULL;
   3509 		}
   3510 		rfs4_dbe_invalidate(fp->rf_dbe);
   3511 	}
   3512 }
   3513 
   3514 /*
   3515  * v4 state cleaner
   3516  */
   3517 void
   3518 rfs4_clean_state_exi(nfs_server_instance_t *instp, struct exportinfo *exi)
   3519 {
   3520 	rfs4_dbe_walk(instp->lo_state_tab, rfs4_lo_state_walk_callout, exi);
   3521 	rfs4_dbe_walk(instp->state_tab, rfs4_state_walk_callout, exi);
   3522 	rfs4_dbe_walk(instp->deleg_state_tab, rfs4_deleg_state_walk_callout,
   3523 	    exi);
   3524 	rfs4_dbe_walk(instp->file_tab, rfs4_file_walk_callout, exi);
   3525 }
   3526 
   3527 /*
   3528  * Given a directory that is being unexported, cleanup/release
   3529  * state for all stateStore occurrences with refering objects.
   3530  */
   3531 void
   3532 sstor_clean_state_exi(struct exportinfo *exi)
   3533 {
   3534 	nfs_server_instance_t *nsip = list_head(&nsi_head);
   3535 
   3536 	while (nsip) {
   3537 		mutex_enter(&nsip->state_lock);
   3538 		if (nsip->inst_flags & NFS_INST_STORE_INIT) {
   3539 			if (nsip->exi_clean_func != NULL)
   3540 				(*nsip->exi_clean_func)(nsip, exi);
   3541 		}
   3542 		mutex_exit(&nsip->state_lock);
   3543 
   3544 		nsip = list_next(&nsi_head, &nsip->nsi_list);
   3545 	}
   3546 }
   3547 
   3548 /*
   3549  * v4 protocol Table Initialzation (common between 4.0 and 4.1)
   3550  */
   3551 void
   3552 v4prot_sstor_init(nfs_server_instance_t *instp)
   3553 {
   3554 	timespec32_t verf;
   3555 	int error;
   3556 
   3557 	/*
   3558 	 * Init the grace timers and reclaim list.
   3559 	 */
   3560 	instp->gstart_time = (time_t)0;
   3561 	instp->grace_period = (time_t)0;
   3562 	instp->lease_period = rfs4_lease_time;
   3563 
   3564 	rw_init(&instp->reclaimlst_lock, NULL, RW_DEFAULT, NULL);
   3565 
   3566 	list_create(&instp->reclaim_head, sizeof (rfs4_reclaim_t),
   3567 	    offsetof(rfs4_reclaim_t, reclaim_list));
   3568 
   3569 	/*
   3570 	 * set the various cache timers for table creation
   3571 	 */
   3572 	SSTOR_CT_INIT(instp, client_cache_time, CLIENT_CACHE_TIME);
   3573 	SSTOR_CT_INIT(instp, openowner_cache_time, OPENOWNER_CACHE_TIME);
   3574 	SSTOR_CT_INIT(instp, state_cache_time, STATE_CACHE_TIME);
   3575 	SSTOR_CT_INIT(instp, lo_state_cache_time, LO_STATE_CACHE_TIME);
   3576 	SSTOR_CT_INIT(instp, lockowner_cache_time, LOCKOWNER_CACHE_TIME);
   3577 	SSTOR_CT_INIT(instp, file_cache_time, FILE_CACHE_TIME);
   3578 	SSTOR_CT_INIT(instp, deleg_state_cache_time, DELEG_STATE_CACHE_TIME);
   3579 
   3580 	/*
   3581 	 * Get the door handle for stable storage upcalls.
   3582 	 */
   3583 	instp->dh = door_ki_lookup(nfs_doorfd);
   3584 	door_ki_hold(instp->dh);
   3585 
   3586 	/*
   3587 	 * Init the stable storage.
   3588 	 */
   3589 	rfs4_ss_retrieve_state(instp);
   3590 
   3591 	/*
   3592 	 * Client table.
   3593 	 */
   3594 	rw_init(&instp->findclient_lock, NULL, RW_DEFAULT, NULL);
   3595 
   3596 	instp->client_tab = rfs4_table_create(
   3597 	    instp, "Client", instp->client_cache_time, 2,
   3598 	    rfs4_client_create, rfs4_client_destroy, rfs4_client_expiry,
   3599 	    sizeof (rfs4_client_t), TABSIZE, MAXTABSZ/8, 100);
   3600 
   3601 	instp->nfsclnt_idx = rfs4_index_create(instp->client_tab,
   3602 	    "nfs_client_id4", nfsclnt_hash, nfsclnt_compare, nfsclnt_mkkey,
   3603 	    TRUE);
   3604 
   3605 	instp->clientid_idx = rfs4_index_create(instp->client_tab,
   3606 	    "client_id", clientid_hash, clientid_compare, clientid_mkkey,
   3607 	    FALSE);
   3608 
   3609 	/*
   3610 	 * File table.
   3611 	 */
   3612 	instp->file_tab = rfs4_table_create(instp,
   3613 	    "File", instp->file_cache_time, 1, rfs4_file_create,
   3614 	    rfs4_file_destroy, NULL, sizeof (rfs4_file_t),
   3615 	    TABSIZE, MAXTABSZ, -1);
   3616 
   3617 	instp->file_idx = rfs4_index_create(instp->file_tab,
   3618 	    "Filehandle", file_hash, file_compare, file_mkkey, TRUE);
   3619 
   3620 	/*
   3621 	 * Open Owner table.
   3622 	 */
   3623 	instp->openowner_tab = rfs4_table_create(
   3624 	    instp, "OpenOwner", instp->openowner_cache_time, 1,
   3625 	    openowner_create, openowner_destroy, rfs4_openowner_expiry,
   3626 	    sizeof (rfs4_openowner_t), TABSIZE, MAXTABSZ, 100);
   3627 
   3628 	instp->openowner_idx = rfs4_index_create(instp->openowner_tab,
   3629 	    "open_owner4", openowner_hash, openowner_compare, openowner_mkkey,
   3630 	    TRUE);
   3631 
   3632 	/*
   3633 	 * State table.
   3634 	 */
   3635 	instp->state_tab = rfs4_table_create(
   3636 	    instp, "OpenStateID", instp->state_cache_time, 3,
   3637 	    rfs4_state_create, rfs4_state_destroy, rfs4_state_expiry,
   3638 	    sizeof (rfs4_state_t), TABSIZE, MAXTABSZ, 100);
   3639 
   3640 	instp->state_owner_file_idx = rfs4_index_create(instp->state_tab,
   3641 	    "Openowner-File", state_owner_file_hash, state_owner_file_compare,
   3642 	    state_owner_file_mkkey, TRUE);
   3643 
   3644 	instp->state_idx = rfs4_index_create(instp->state_tab,
   3645 	    "State-id", state_hash, state_compare, state_mkkey, FALSE);
   3646 
   3647 	instp->state_file_idx = rfs4_index_create(instp->state_tab, "File",
   3648 	    state_file_hash, state_file_compare, state_file_mkkey, FALSE);
   3649 
   3650 	/*
   3651 	 * Lock Owner tables.
   3652 	 */
   3653 	instp->lo_state_tab = rfs4_table_create(
   3654 	    instp, "LockStateID", instp->lo_state_cache_time, 2,
   3655 	    rfs4_lo_state_create, rfs4_lo_state_destroy, rfs4_lo_state_expiry,
   3656 	    sizeof (rfs4_lo_state_t), TABSIZE, MAXTABSZ, 100);
   3657 
   3658 	instp->lo_state_owner_idx = rfs4_index_create(instp->lo_state_tab,
   3659 	    "lockowner_state", lo_state_lo_hash, lo_state_lo_compare,
   3660 	    lo_state_lo_mkkey, TRUE);
   3661 
   3662 	instp->lo_state_idx = rfs4_index_create(instp->lo_state_tab,
   3663 	    "State-id", lo_state_hash, lo_state_compare, lo_state_mkkey,
   3664 	    FALSE);
   3665 
   3666 	instp->lockowner_tab = rfs4_table_create(
   3667 	    instp, "Lockowner", instp->lockowner_cache_time, 2,
   3668 	    rfs4_lockowner_create, rfs4_lockowner_destroy,
   3669 	    rfs4_lockowner_expiry, sizeof (rfs4_lockowner_t), TABSIZE,
   3670 	    MAXTABSZ, 100);
   3671 
   3672 	instp->lockowner_idx = rfs4_index_create(instp->lockowner_tab,
   3673 	    "lock_owner4", lockowner_hash, lockowner_compare,
   3674 	    lockowner_mkkey, TRUE);
   3675 
   3676 	instp->lockowner_pid_idx = rfs4_index_create(instp->lockowner_tab,
   3677 	    "pid", pid_hash, pid_compare, pid_mkkey, FALSE);
   3678 
   3679 	/*
   3680 	 * Delegation state table
   3681 	 */
   3682 	instp->deleg_state_tab = rfs4_table_create(
   3683 	    instp, "DelegStateID", instp->deleg_state_cache_time, 2,
   3684 	    rfs4_deleg_state_create, rfs4_deleg_state_destroy,
   3685 	    rfs4_deleg_state_expiry, sizeof (rfs4_deleg_state_t),
   3686 	    TABSIZE, MAXTABSZ, 100);
   3687 
   3688 	instp->deleg_idx = rfs4_index_create(instp->deleg_state_tab,
   3689 	    "DelegByFileClient", deleg_hash, deleg_compare, deleg_mkkey,
   3690 	    TRUE);
   3691 
   3692 	instp->deleg_state_idx = rfs4_index_create(instp->deleg_state_tab,
   3693 	    "DelegState", deleg_state_hash, deleg_state_compare,
   3694 	    deleg_state_mkkey, FALSE);
   3695 
   3696 	mutex_init(&instp->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
   3697 
   3698 	/* Used to manage access to rfs4_deleg_policy */
   3699 	rw_init(&instp->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
   3700 
   3701 	instp->vkey = 0;
   3702 	vsd_create(&instp->vkey, NULL);
   3703 
   3704 	instp->lockt_sysid = lm_alloc_sysidt();
   3705 	instp->caller_id = fs_new_caller_id();
   3706 
   3707 	/*
   3708 	 * The following algorithm attempts to find a unique verifier
   3709 	 * to be used as the write verifier returned from the server
   3710 	 * to the client.  It is important that this verifier change
   3711 	 * whenever the server reboots.  Of secondary importance, it
   3712 	 * is important for the verifier to be unique between two
   3713 	 * different servers.
   3714 	 *
   3715 	 * Thus, an attempt is made to use the system hostid and the
   3716 	 * current time in seconds when the nfssrv kernel module is
   3717 	 * loaded.  It is assumed that an NFS server will not be able
   3718 	 * to boot and then to reboot in less than a second.  If the
   3719 	 * hostid has not been set, then the current high resolution
   3720 	 * time is used.  This will ensure different verifiers each
   3721 	 * time the server reboots and minimize the chances that two
   3722 	 * different servers will have the same verifier.
   3723 	 * XXX - this is broken on LP64 kernels.
   3724 	 */
   3725 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
   3726 	if (verf.tv_sec != 0) {
   3727 		verf.tv_nsec = gethrestime_sec();
   3728 	} else {
   3729 		timespec_t tverf;
   3730 
   3731 		gethrestime(&tverf);
   3732 		verf.tv_sec = (time_t)tverf.tv_sec;
   3733 		verf.tv_nsec = tverf.tv_nsec;
   3734 	}
   3735 
   3736 	instp->Write4verf = *(uint64_t *)&verf;
   3737 
   3738 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl,
   3739 	    &instp->deleg_rdops);
   3740 
   3741 	if (error == 0) {
   3742 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
   3743 		    &instp->deleg_wrops);
   3744 		if (error)
   3745 			fem_free(instp->deleg_rdops);
   3746 	}
   3747 
   3748 	if (error)
   3749 		rfs4_disable_delegation(instp);
   3750 }
   3751 
   3752 /*
   3753  * Used to initialize NFSv4.0 server's state.  All of the tables are
   3754  * created and timers are set. Only called when an occurrence
   3755  * of NFSv4.0 is needed.
   3756  */
   3757 void
   3758 rfs4_sstor_init(nfs_server_instance_t *instp)
   3759 {
   3760 	extern boolean_t rfs4_cpr_callb(void *, int);
   3761 	extern void rfs4_do_cb_recall(rfs4_deleg_state_t *, bool_t);
   3762 	extern rfs4_cbstate_t rfs4_cbcheck(rfs4_state_t *);
   3763 
   3764 	int  need_sstor_init;
   3765 
   3766 	/*
   3767 	 * Create the state store and set the
   3768 	 * start-up time.
   3769 	 */
   3770 	need_sstor_init = sstor_init(instp, 60);
   3771 
   3772 	if (need_sstor_init == 0)
   3773 		return;
   3774 
   3775 	instp->deleg_cbrecall = rfs4_do_cb_recall;
   3776 	instp->deleg_cbcheck =  rfs4_cbcheck;
   3777 
   3778 	/*
   3779 	 * Add a CPR callback so that we can update client
   3780 	 * access times to extend the lease after a suspend
   3781 	 * and resume (we use same class as rpcmod/connmgr)
   3782 	 */
   3783 	instp->cpr_id = callb_add(rfs4_cpr_callb, instp, CB_CL_CPR_RPC,
   3784 	    instp->inst_name);
   3785 
   3786 	/*
   3787 	 * Make the NFSv4.0 protocol tables and indexes.
   3788 	 */
   3789 	v4prot_sstor_init(instp);
   3790 
   3791 	instp->attrvers = 0;
   3792 
   3793 	/*
   3794 	 * Mark it as fully initialized
   3795 	 */
   3796 	instp->inst_flags |= NFS_INST_STORE_INIT | NFS_INST_v40;
   3797 
   3798 	/*
   3799 	 * Clear out any old init state.
   3800 	 */
   3801 	instp->inst_flags &= ~NFS_INST_TERMINUS;
   3802 
   3803 	mutex_exit(&instp->state_lock);
   3804 }
   3805 
   3806 /*
   3807  * Used at server occurrence shutdown to cleanup all of the NFSv4.0
   3808  * structures and other state.
   3809  */
   3810 void
   3811 rfs4_sstor_fini(nfs_server_instance_t *instp)
   3812 {
   3813 	rfs4_database_t *dbp;
   3814 
   3815 	mutex_enter(&instp->state_lock);
   3816 
   3817 	if (instp->state_store == NULL) {
   3818 		mutex_exit(&instp->state_lock);
   3819 		return;
   3820 	}
   3821 
   3822 	/*
   3823 	 * Mark it as being terminated.
   3824 	 */
   3825 	instp->inst_flags |= NFS_INST_TERMINUS;
   3826 
   3827 	rfs4_set_deleg_policy(instp, SRV_NEVER_DELEGATE);
   3828 	dbp = instp->state_store;
   3829 
   3830 	/*
   3831 	 * Cleanup the kspe policies.
   3832 	 */
   3833 	nfs41_spe_fini();
   3834 
   3835 	/*
   3836 	 * Cleanup the CPR callback.
   3837 	 */
   3838 	if (instp->cpr_id)
   3839 		(void) callb_delete(instp->cpr_id);
   3840 
   3841 	rw_destroy(&instp->findclient_lock);
   3842 
   3843 	/* First stop all of the reaper threads in the database */
   3844 	rfs4_database_shutdown(dbp);
   3845 
   3846 	instp->state_store = NULL;
   3847 
   3848 	/* clean up any dangling stable storage structures */
   3849 	rfs4_ss_fini(instp);
   3850 
   3851 	/* Now actually destroy/release the database and its tables */
   3852 	rfs4_database_destroy(dbp);
   3853 
   3854 	/* If the mds, then cleanup the id_space for mds_mpd */
   3855 	if (instp->mds_mpd_id_space) {
   3856 		id_space_destroy(instp->mds_mpd_id_space);
   3857 	}
   3858 
   3859 	mutex_exit(&instp->state_lock);
   3860 
   3861 	rw_destroy(&instp->reclaimlst_lock);
   3862 	list_destroy(&instp->reclaim_head);
   3863 
   3864 	/* reset the "first NFSv4 request" status */
   3865 	instp->seen_first_compound = 0;
   3866 
   3867 	/* DSS: distributed stable storage */
   3868 	if (rfs4_dss_oldpaths)
   3869 		nvlist_free(rfs4_dss_oldpaths);
   3870 	if (rfs4_dss_paths)
   3871 		nvlist_free(rfs4_dss_paths);
   3872 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
   3873 
   3874 	/*
   3875 	 * Clear out that it was initialized.
   3876 	 */
   3877 	instp->inst_flags &= ~(NFS_INST_STORE_INIT|NFS_INST_v40|
   3878 	    NFS_INST_v41|NFS_INST_DS);
   3879 }
   3880