Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
     27 /* All Rights Reserved */
     28 
     29 
     30 #include <nfs/nfs4_clnt.h>
     31 #include <nfs/rnode4.h>
     32 #include <sys/systm.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/atomic.h>
     35 
     36 static void	nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *);
     37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *,
     38 				nfs4_oo_hash_bucket_t *, mntinfo4_t *);
     39 static open_delegation_type4 get_dtype(rnode4_t *);
     40 
     41 #ifdef DEBUG
     42 int nfs4_client_foo_debug = 0x0;
     43 int nfs4_client_open_dg = 0x0;
     44 /*
     45  * If this is non-zero, the lockowner and openowner seqid sync primitives
     46  * will intermittently return errors.
     47  */
     48 static int seqid_sync_faults = 0;
     49 #endif
     50 
     51 stateid4 clnt_special0 = {
     52 	0,
     53 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
     54 };
     55 
     56 stateid4 clnt_special1 = {
     57 	0xffffffff,
     58 	{
     59 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
     60 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
     61 		(char)0xff, (char)0xff, (char)0xff, (char)0xff
     62 	}
     63 };
     64 
     65 /* finds hash bucket and locks it */
     66 static nfs4_oo_hash_bucket_t *
     67 lock_bucket(cred_t *cr, mntinfo4_t *mi)
     68 {
     69 	nfs4_oo_hash_bucket_t *bucketp;
     70 	uint32_t hash_key;
     71 
     72 	hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr))
     73 	    % NFS4_NUM_OO_BUCKETS;
     74 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: "
     75 	    "hash_key %d for cred %p", hash_key, (void*)cr));
     76 
     77 	ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS);
     78 	ASSERT(mi != NULL);
     79 	ASSERT(mutex_owned(&mi->mi_lock));
     80 
     81 	bucketp = &(mi->mi_oo_list[hash_key]);
     82 	mutex_enter(&bucketp->b_lock);
     83 	return (bucketp);
     84 }
     85 
     86 /* unlocks hash bucket pointed by bucket_ptr */
     87 static void
     88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp)
     89 {
     90 	mutex_exit(&bucketp->b_lock);
     91 }
     92 
     93 /*
     94  * Removes the lock owner from the rnode's lock_owners list and frees the
     95  * corresponding reference.
     96  */
     97 void
     98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop)
     99 {
    100 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    101 	    "nfs4_rnode_remove_lock_owner"));
    102 
    103 	mutex_enter(&rp->r_statev4_lock);
    104 
    105 	if (lop->lo_next_rnode == NULL) {
    106 		/* already removed from list */
    107 		mutex_exit(&rp->r_statev4_lock);
    108 		return;
    109 	}
    110 
    111 	ASSERT(lop->lo_prev_rnode != NULL);
    112 
    113 	lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
    114 	lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
    115 
    116 	lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
    117 
    118 	mutex_exit(&rp->r_statev4_lock);
    119 
    120 	/*
    121 	 * This would be an appropriate place for
    122 	 * RELEASE_LOCKOWNER.  For now, this is overkill
    123 	 * because in the common case, close is going to
    124 	 * release any lockowners anyway.
    125 	 */
    126 	lock_owner_rele(lop);
    127 }
    128 
    129 /*
    130  * Remove all lock owners from the rnode's lock_owners list.  Frees up
    131  * their references from the list.
    132  */
    133 
    134 void
    135 nfs4_flush_lock_owners(rnode4_t *rp)
    136 {
    137 	nfs4_lock_owner_t *lop;
    138 
    139 	mutex_enter(&rp->r_statev4_lock);
    140 	while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) {
    141 		lop = rp->r_lo_head.lo_next_rnode;
    142 		lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
    143 		lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
    144 		lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
    145 		lock_owner_rele(lop);
    146 	}
    147 	mutex_exit(&rp->r_statev4_lock);
    148 }
    149 
    150 void
    151 nfs4_clear_open_streams(rnode4_t *rp)
    152 {
    153 	nfs4_open_stream_t *osp;
    154 
    155 	mutex_enter(&rp->r_os_lock);
    156 	while ((osp = list_head(&rp->r_open_streams)) != NULL) {
    157 		open_owner_rele(osp->os_open_owner);
    158 		list_remove(&rp->r_open_streams, osp);
    159 		mutex_destroy(&osp->os_sync_lock);
    160 		osp->os_open_owner = NULL;
    161 		kmem_free(osp, sizeof (*osp));
    162 	}
    163 	mutex_exit(&rp->r_os_lock);
    164 }
    165 
    166 void
    167 open_owner_hold(nfs4_open_owner_t *oop)
    168 {
    169 	mutex_enter(&oop->oo_lock);
    170 	oop->oo_ref_count++;
    171 	mutex_exit(&oop->oo_lock);
    172 }
    173 
    174 /*
    175  * Frees the open owner if the ref count hits zero.
    176  */
    177 void
    178 open_owner_rele(nfs4_open_owner_t *oop)
    179 {
    180 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    181 	    "open_owner_rele"));
    182 
    183 	mutex_enter(&oop->oo_lock);
    184 	oop->oo_ref_count--;
    185 	if (oop->oo_ref_count == 0) {
    186 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    187 		    "open_owner_rele: freeing open owner"));
    188 		oop->oo_valid = 0;
    189 		mutex_exit(&oop->oo_lock);
    190 		/*
    191 		 * Ok, we don't destroy the open owner, nor do we put it on
    192 		 * the mntinfo4's free list just yet.  We are lazy about it
    193 		 * and let callers to find_open_owner() do that to keep locking
    194 		 * simple.
    195 		 */
    196 	} else {
    197 		mutex_exit(&oop->oo_lock);
    198 	}
    199 }
    200 
    201 void
    202 open_stream_hold(nfs4_open_stream_t *osp)
    203 {
    204 	mutex_enter(&osp->os_sync_lock);
    205 	osp->os_ref_count++;
    206 	mutex_exit(&osp->os_sync_lock);
    207 }
    208 
    209 /*
    210  * Frees the open stream and removes it from the rnode4's open streams list if
    211  * the ref count drops to zero.
    212  */
    213 void
    214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp)
    215 {
    216 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    217 	    "open_stream_rele"));
    218 
    219 	ASSERT(!mutex_owned(&rp->r_os_lock));
    220 
    221 	mutex_enter(&osp->os_sync_lock);
    222 	ASSERT(osp->os_ref_count > 0);
    223 	osp->os_ref_count--;
    224 	if (osp->os_ref_count == 0) {
    225 		nfs4_open_owner_t *tmp_oop;
    226 
    227 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    228 		    "open_stream_rele: freeing open stream"));
    229 		osp->os_valid = 0;
    230 		tmp_oop = osp->os_open_owner;
    231 		mutex_exit(&osp->os_sync_lock);
    232 
    233 		/* now see if we need to destroy the open owner */
    234 		open_owner_rele(tmp_oop);
    235 
    236 		mutex_enter(&rp->r_os_lock);
    237 		list_remove(&rp->r_open_streams, osp);
    238 		mutex_exit(&rp->r_os_lock);
    239 
    240 		/* free up osp */
    241 		mutex_destroy(&osp->os_sync_lock);
    242 		osp->os_open_owner = NULL;
    243 		kmem_free(osp, sizeof (*osp));
    244 	} else {
    245 		mutex_exit(&osp->os_sync_lock);
    246 	}
    247 }
    248 
    249 void
    250 lock_owner_hold(nfs4_lock_owner_t *lop)
    251 {
    252 	mutex_enter(&lop->lo_lock);
    253 	lop->lo_ref_count++;
    254 	mutex_exit(&lop->lo_lock);
    255 }
    256 
    257 /*
    258  * Frees the lock owner if the ref count hits zero and
    259  * the structure no longer has no locks.
    260  */
    261 void
    262 lock_owner_rele(nfs4_lock_owner_t *lop)
    263 {
    264 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    265 	    "lock_owner_rele"));
    266 
    267 	mutex_enter(&lop->lo_lock);
    268 	lop->lo_ref_count--;
    269 	if (lop->lo_ref_count == 0) {
    270 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    271 		    "lock_owner_rele: freeing lock owner: "
    272 		    "%x", lop->lo_pid));
    273 		lop->lo_valid = 0;
    274 		/*
    275 		 * If there are no references, the lock_owner should
    276 		 * already be off the rnode's list.
    277 		 */
    278 		ASSERT(lop->lo_next_rnode == NULL);
    279 		ASSERT(lop->lo_prev_rnode == NULL);
    280 		ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE));
    281 		ASSERT(lop->lo_seqid_holder == NULL);
    282 		mutex_exit(&lop->lo_lock);
    283 
    284 		/* free up lop */
    285 		cv_destroy(&lop->lo_cv_seqid_sync);
    286 		mutex_destroy(&lop->lo_lock);
    287 		kmem_free(lop, sizeof (*lop));
    288 	} else {
    289 		mutex_exit(&lop->lo_lock);
    290 	}
    291 }
    292 
    293 /*
    294  * This increments the open owner ref count if found.
    295  * The argument 'just_created' determines whether we are looking for open
    296  * owners with the 'oo_just_created' flag set or not.
    297  */
    298 nfs4_open_owner_t *
    299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi)
    300 {
    301 	nfs4_open_owner_t	*oop = NULL, *next_oop;
    302 	nfs4_oo_hash_bucket_t	*bucketp;
    303 
    304 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    305 	    "find_open_owner: cred %p, just_created %d",
    306 	    (void*)cr, just_created));
    307 
    308 	ASSERT(mi != NULL);
    309 	ASSERT(mutex_owned(&mi->mi_lock));
    310 
    311 	bucketp = lock_bucket(cr, mi);
    312 
    313 	/* got hash bucket, search through open owners */
    314 	for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) {
    315 		mutex_enter(&oop->oo_lock);
    316 		if (!crcmp(oop->oo_cred, cr) &&
    317 		    (oop->oo_just_created == just_created ||
    318 		    just_created == NFS4_JUST_CREATED)) {
    319 			/* match */
    320 			if (oop->oo_valid == 0) {
    321 				/* reactivate the open owner */
    322 				oop->oo_valid = 1;
    323 				ASSERT(oop->oo_ref_count == 0);
    324 			}
    325 			oop->oo_ref_count++;
    326 			mutex_exit(&oop->oo_lock);
    327 			unlock_bucket(bucketp);
    328 			return (oop);
    329 		}
    330 		next_oop = list_next(&bucketp->b_oo_hash_list, oop);
    331 		if (oop->oo_valid == 0) {
    332 			list_remove(&bucketp->b_oo_hash_list, oop);
    333 
    334 			/*
    335 			 * Now we go ahead and put this open owner
    336 			 * on the freed list.  This is our lazy method.
    337 			 */
    338 			nfs4_free_open_owner(oop, mi);
    339 		}
    340 
    341 		mutex_exit(&oop->oo_lock);
    342 		oop = next_oop;
    343 	}
    344 
    345 	/* search through recently freed open owners */
    346 	oop = find_freed_open_owner(cr, bucketp, mi);
    347 
    348 	unlock_bucket(bucketp);
    349 
    350 	return (oop);
    351 }
    352 
    353 nfs4_open_owner_t *
    354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi)
    355 {
    356 	nfs4_open_owner_t *oop;
    357 
    358 	mutex_enter(&mi->mi_lock);
    359 	oop = find_open_owner_nolock(cr, just_created, mi);
    360 	mutex_exit(&mi->mi_lock);
    361 
    362 	return (oop);
    363 }
    364 
    365 /*
    366  * This increments osp's ref count if found.
    367  * Returns with 'os_sync_lock' held.
    368  */
    369 nfs4_open_stream_t *
    370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
    371 {
    372 	nfs4_open_stream_t	*osp;
    373 
    374 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    375 	    "find_open_stream"));
    376 
    377 	mutex_enter(&rp->r_os_lock);
    378 	/* Now, no one can add or delete to rp's open streams list */
    379 	for (osp = list_head(&rp->r_open_streams); osp != NULL;
    380 	    osp = list_next(&rp->r_open_streams, osp)) {
    381 		mutex_enter(&osp->os_sync_lock);
    382 		if (osp->os_open_owner == oop && osp->os_valid != 0) {
    383 			/* match */
    384 			NFS4_DEBUG(nfs4_client_state_debug,
    385 			    (CE_NOTE, "find_open_stream "
    386 			    "got a match"));
    387 
    388 			osp->os_ref_count++;
    389 			mutex_exit(&rp->r_os_lock);
    390 			return (osp);
    391 		}
    392 		mutex_exit(&osp->os_sync_lock);
    393 	}
    394 
    395 	mutex_exit(&rp->r_os_lock);
    396 	return (NULL);
    397 }
    398 
    399 /*
    400  * Find the lock owner for the given file and process ID.  If "which" is
    401  * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid
    402  * from the server.
    403  *
    404  * This increments the lock owner's ref count if found.  Returns NULL if
    405  * there was no match.
    406  */
    407 nfs4_lock_owner_t *
    408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which)
    409 {
    410 	nfs4_lock_owner_t	*lop, *next_lop;
    411 
    412 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    413 	    "find_lock_owner: pid %x, which %d", pid, which));
    414 
    415 	ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID);
    416 
    417 	/* search by pid */
    418 	mutex_enter(&rp->r_statev4_lock);
    419 
    420 	lop = rp->r_lo_head.lo_next_rnode;
    421 	while (lop != &rp->r_lo_head) {
    422 		mutex_enter(&lop->lo_lock);
    423 		if (lop->lo_pid == pid && lop->lo_valid != 0 &&
    424 		    !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) {
    425 			if (which == LOWN_ANY ||
    426 			    lop->lo_just_created != NFS4_JUST_CREATED) {
    427 				/* Found a matching lock owner */
    428 				NFS4_DEBUG(nfs4_client_state_debug,
    429 				    (CE_NOTE, "find_lock_owner: "
    430 				    "got a match"));
    431 
    432 				lop->lo_ref_count++;
    433 				mutex_exit(&lop->lo_lock);
    434 				mutex_exit(&rp->r_statev4_lock);
    435 				return (lop);
    436 			}
    437 		}
    438 		next_lop = lop->lo_next_rnode;
    439 		mutex_exit(&lop->lo_lock);
    440 		lop = next_lop;
    441 	}
    442 
    443 	mutex_exit(&rp->r_statev4_lock);
    444 	return (NULL);
    445 }
    446 
    447 /*
    448  * This returns the delegation stateid as 'sid'. Returns 1 if a successful
    449  * delegation stateid was found, otherwise returns 0.
    450  */
    451 
    452 static int
    453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid)
    454 {
    455 	ASSERT(!mutex_owned(&rp->r_statev4_lock));
    456 
    457 	mutex_enter(&rp->r_statev4_lock);
    458 	if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) ||
    459 	    (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) &&
    460 	    !rp->r_deleg_return_pending) {
    461 
    462 		*sid = rp->r_deleg_stateid;
    463 		mutex_exit(&rp->r_statev4_lock);
    464 		return (1);
    465 	}
    466 	mutex_exit(&rp->r_statev4_lock);
    467 	return (0);
    468 }
    469 
    470 /*
    471  * This returns the lock stateid as 'sid'. Returns 1 if a successful lock
    472  * stateid was found, otherwise returns 0.
    473  */
    474 static int
    475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid)
    476 {
    477 	nfs4_lock_owner_t *lop;
    478 
    479 	lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
    480 
    481 	if (lop) {
    482 		/*
    483 		 * Found a matching lock owner, so use a lock
    484 		 * stateid rather than an open stateid.
    485 		 */
    486 		mutex_enter(&lop->lo_lock);
    487 		*sid = lop->lock_stateid;
    488 		mutex_exit(&lop->lo_lock);
    489 		lock_owner_rele(lop);
    490 		return (1);
    491 	}
    492 
    493 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    494 	    "nfs4_get_lock_stateid: no lop"));
    495 	return (0);
    496 }
    497 
    498 /*
    499  * This returns the open stateid as 'sid'. Returns 1 if a successful open
    500  * stateid was found, otherwise returns 0.
    501  *
    502  * Once the stateid is returned to the caller, it is no longer protected;
    503  * so the caller must be prepared to handle OLD/BAD_STATEID where
    504  * appropiate.
    505  */
    506 static int
    507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid)
    508 {
    509 	nfs4_open_owner_t *oop;
    510 	nfs4_open_stream_t *osp;
    511 
    512 	ASSERT(mi != NULL);
    513 
    514 	oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
    515 	if (!oop) {
    516 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    517 		    "nfs4_get_open_stateid: no oop"));
    518 		return (0);
    519 	}
    520 
    521 	osp = find_open_stream(oop, rp);
    522 	open_owner_rele(oop);
    523 	if (!osp) {
    524 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    525 		    "nfs4_get_open_stateid: no osp"));
    526 		return (0);
    527 	}
    528 
    529 	if (osp->os_failed_reopen) {
    530 		NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    531 		    "nfs4_get_open_stateid: osp %p failed reopen",
    532 		    (void *)osp));
    533 		mutex_exit(&osp->os_sync_lock);
    534 		open_stream_rele(osp, rp);
    535 		return (0);
    536 	}
    537 	*sid = osp->open_stateid;
    538 	mutex_exit(&osp->os_sync_lock);
    539 	open_stream_rele(osp, rp);
    540 	return (1);
    541 }
    542 
    543 /*
    544  * Returns the delegation stateid if this 'op' is OP_WRITE and the
    545  * delegation we hold is a write delegation, OR this 'op' is not
    546  * OP_WRITE and we have a delegation held (read or write), otherwise
    547  * returns the lock stateid if there is a lock owner, otherwise
    548  * returns the open stateid if there is a open stream, otherwise
    549  * returns special stateid <seqid = 0, other = 0>.
    550  *
    551  * Used for WRITE operations.
    552  */
    553 stateid4
    554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
    555 	nfs_opnum4 op, nfs4_stateid_types_t *sid_tp)
    556 {
    557 	stateid4 sid;
    558 
    559 	if (nfs4_get_deleg_stateid(rp, op, &sid)) {
    560 		if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
    561 			sid_tp->cur_sid_type = DEL_SID;
    562 			return (sid);
    563 		}
    564 	}
    565 	if (nfs4_get_lock_stateid(rp, pid, &sid)) {
    566 		if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
    567 			sid_tp->cur_sid_type = LOCK_SID;
    568 			return (sid);
    569 		}
    570 	}
    571 	if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
    572 		if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
    573 			sid_tp->cur_sid_type = OPEN_SID;
    574 			return (sid);
    575 		}
    576 	}
    577 	bzero(&sid, sizeof (stateid4));
    578 	sid_tp->cur_sid_type = SPEC_SID;
    579 	return (sid);
    580 }
    581 
    582 /*
    583  * Returns the delegation stateid if this 'op' is OP_WRITE and the
    584  * delegation we hold is a write delegation, OR this 'op' is not
    585  * OP_WRITE and we have a delegation held (read or write), otherwise
    586  * returns the lock stateid if there is a lock owner, otherwise
    587  * returns the open stateid if there is a open stream, otherwise
    588  * returns special stateid <seqid = 0, other = 0>.
    589  *
    590  * This also updates which stateid we are using in 'sid_tp', skips
    591  * previously attempted stateids, and skips checking higher priority
    592  * stateids than the current level as dictated by 'sid_tp->cur_sid_type'
    593  * for async reads.
    594  *
    595  * Used for READ and SETATTR operations.
    596  */
    597 stateid4
    598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
    599 	nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read)
    600 {
    601 	stateid4 sid;
    602 
    603 	/*
    604 	 * For asynchronous READs, do not attempt to retry from the start of
    605 	 * the stateid priority list, just continue from where you last left
    606 	 * off.
    607 	 */
    608 	if (async_read) {
    609 		switch (sid_tp->cur_sid_type) {
    610 		case NO_SID:
    611 			break;
    612 		case DEL_SID:
    613 			goto lock_stateid;
    614 		case LOCK_SID:
    615 			goto open_stateid;
    616 		case OPEN_SID:
    617 			goto special_stateid;
    618 		case SPEC_SID:
    619 		default:
    620 			cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current "
    621 			    "stateid type %d", sid_tp->cur_sid_type);
    622 		}
    623 	}
    624 
    625 	if (nfs4_get_deleg_stateid(rp, op, &sid)) {
    626 		if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
    627 			sid_tp->cur_sid_type = DEL_SID;
    628 			return (sid);
    629 		}
    630 	}
    631 lock_stateid:
    632 	if (nfs4_get_lock_stateid(rp, pid, &sid)) {
    633 		if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
    634 			sid_tp->cur_sid_type = LOCK_SID;
    635 			return (sid);
    636 		}
    637 	}
    638 open_stateid:
    639 	if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
    640 		if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
    641 			sid_tp->cur_sid_type = OPEN_SID;
    642 			return (sid);
    643 		}
    644 	}
    645 special_stateid:
    646 	bzero(&sid, sizeof (stateid4));
    647 	sid_tp->cur_sid_type = SPEC_SID;
    648 	return	(sid);
    649 }
    650 
    651 void
    652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid)
    653 {
    654 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    655 	    "nfs4_set_lock_stateid"));
    656 
    657 	ASSERT(lop);
    658 	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
    659 
    660 	mutex_enter(&lop->lo_lock);
    661 	lop->lock_stateid = stateid;
    662 	mutex_exit(&lop->lo_lock);
    663 }
    664 
    665 /*
    666  * Sequence number used when a new open owner is needed.
    667  * This is used so as to not confuse the server.  Since a open owner
    668  * is based off of cred, a cred could be re-used quickly, and the server
    669  * may not release all state for a cred.
    670  */
    671 static uint64_t open_owner_seq_num = 0;
    672 
    673 uint64_t
    674 nfs4_get_new_oo_name(void)
    675 {
    676 	return (atomic_add_64_nv(&open_owner_seq_num, 1));
    677 }
    678 
    679 /*
    680  * Create a new open owner and add it to the open owner hash table.
    681  */
    682 nfs4_open_owner_t *
    683 create_open_owner(cred_t *cr, mntinfo4_t *mi)
    684 {
    685 	nfs4_open_owner_t	*oop;
    686 	nfs4_oo_hash_bucket_t	*bucketp;
    687 
    688 	oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP);
    689 	/*
    690 	 * Make sure the cred doesn't go away when we put this open owner
    691 	 * on the free list, as well as make crcmp() a valid check.
    692 	 */
    693 	crhold(cr);
    694 	oop->oo_cred = cr;
    695 	mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL);
    696 	oop->oo_ref_count = 1;
    697 	oop->oo_valid = 1;
    698 	oop->oo_just_created = NFS4_JUST_CREATED;
    699 	oop->oo_seqid = 0;
    700 	oop->oo_seqid_inuse = 0;
    701 	oop->oo_last_good_seqid = 0;
    702 	oop->oo_last_good_op = TAG_NONE;
    703 	oop->oo_cred_otw = NULL;
    704 	cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
    705 
    706 	/*
    707 	 * A Solaris open_owner is <oo_seq_num>
    708 	 */
    709 	oop->oo_name = nfs4_get_new_oo_name();
    710 
    711 	/* now add the struct into the cred hash table */
    712 	ASSERT(mutex_owned(&mi->mi_lock));
    713 	bucketp = lock_bucket(cr, mi);
    714 	list_insert_head(&bucketp->b_oo_hash_list, oop);
    715 	unlock_bucket(bucketp);
    716 
    717 	return (oop);
    718 }
    719 
    720 /*
    721  * Create a new open stream and it to the rnode's list.
    722  * Increments the ref count on oop.
    723  * Returns with 'os_sync_lock' held.
    724  */
    725 nfs4_open_stream_t *
    726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
    727 {
    728 	nfs4_open_stream_t	*osp;
    729 
    730 #ifdef DEBUG
    731 	mutex_enter(&oop->oo_lock);
    732 	ASSERT(oop->oo_seqid_inuse);
    733 	mutex_exit(&oop->oo_lock);
    734 #endif
    735 
    736 	osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP);
    737 	osp->os_open_ref_count = 1;
    738 	osp->os_mapcnt = 0;
    739 	osp->os_ref_count = 2;
    740 	osp->os_valid = 1;
    741 	osp->os_open_owner = oop;
    742 	osp->os_orig_oo_name = oop->oo_name;
    743 	bzero(&osp->open_stateid, sizeof (stateid4));
    744 	osp->os_share_acc_read = 0;
    745 	osp->os_share_acc_write = 0;
    746 	osp->os_mmap_read = 0;
    747 	osp->os_mmap_write = 0;
    748 	osp->os_share_deny_none = 0;
    749 	osp->os_share_deny_read = 0;
    750 	osp->os_share_deny_write = 0;
    751 	osp->os_delegation = 0;
    752 	osp->os_dc_openacc = 0;
    753 	osp->os_final_close = 0;
    754 	osp->os_pending_close = 0;
    755 	osp->os_failed_reopen = 0;
    756 	osp->os_force_close = 0;
    757 	mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL);
    758 
    759 	/* open owner gets a reference */
    760 	open_owner_hold(oop);
    761 
    762 	/* now add the open stream to rp */
    763 	mutex_enter(&rp->r_os_lock);
    764 	mutex_enter(&osp->os_sync_lock);
    765 	list_insert_head(&rp->r_open_streams, osp);
    766 	mutex_exit(&rp->r_os_lock);
    767 
    768 	return (osp);
    769 }
    770 
    771 /*
    772  * Returns an open stream with 'os_sync_lock' held.
    773  * If the open stream is found (rather than created), its
    774  * 'os_open_ref_count' is bumped.
    775  *
    776  * There is no race with two threads entering this function
    777  * and creating two open streams for the same <oop, rp> pair.
    778  * This is because the open seqid sync must be acquired, thus
    779  * only allowing one thread in at a time.
    780  */
    781 nfs4_open_stream_t *
    782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp,
    783 	int *created_osp)
    784 {
    785 	nfs4_open_stream_t *osp;
    786 
    787 #ifdef DEBUG
    788 	mutex_enter(&oop->oo_lock);
    789 	ASSERT(oop->oo_seqid_inuse);
    790 	mutex_exit(&oop->oo_lock);
    791 #endif
    792 
    793 	osp = find_open_stream(oop, rp);
    794 	if (!osp) {
    795 		osp = create_open_stream(oop, rp);
    796 		if (osp)
    797 			*created_osp = 1;
    798 	} else {
    799 		*created_osp = 0;
    800 		osp->os_open_ref_count++;
    801 	}
    802 
    803 	return (osp);
    804 }
    805 
    806 static uint64_t lock_owner_seq_num = 0;
    807 
    808 /*
    809  * Create a new lock owner and add it to the rnode's list.
    810  * Assumes the rnode's r_statev4_lock is held.
    811  * The created lock owner has a reference count of 2: one for the list and
    812  * one for the caller to use.  Returns the lock owner locked down.
    813  */
    814 nfs4_lock_owner_t *
    815 create_lock_owner(rnode4_t *rp, pid_t pid)
    816 {
    817 	nfs4_lock_owner_t	*lop;
    818 
    819 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    820 	    "create_lock_owner: pid %x", pid));
    821 
    822 	ASSERT(mutex_owned(&rp->r_statev4_lock));
    823 
    824 	lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP);
    825 	lop->lo_ref_count = 2;
    826 	lop->lo_valid = 1;
    827 	bzero(&lop->lock_stateid, sizeof (stateid4));
    828 	lop->lo_pid = pid;
    829 	lop->lock_seqid = 0;
    830 	lop->lo_pending_rqsts = 0;
    831 	lop->lo_just_created = NFS4_JUST_CREATED;
    832 	lop->lo_flags = 0;
    833 	lop->lo_seqid_holder = NULL;
    834 
    835 	/*
    836 	 * A Solaris lock_owner is <seq_num><pid>
    837 	 */
    838 	lop->lock_owner_name.ln_seq_num =
    839 	    atomic_add_64_nv(&lock_owner_seq_num, 1);
    840 	lop->lock_owner_name.ln_pid = pid;
    841 
    842 	cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
    843 	mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL);
    844 
    845 	mutex_enter(&lop->lo_lock);
    846 
    847 	/* now add the lock owner to rp */
    848 	lop->lo_prev_rnode = &rp->r_lo_head;
    849 	lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode;
    850 	rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop;
    851 	rp->r_lo_head.lo_next_rnode = lop;
    852 
    853 	return (lop);
    854 
    855 }
    856 
    857 /*
    858  * This sets the lock seqid of a lock owner.
    859  */
    860 void
    861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop)
    862 {
    863 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    864 	    "nfs4_set_lock_seqid"));
    865 
    866 	ASSERT(lop != NULL);
    867 	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
    868 
    869 	lop->lock_seqid = seqid;
    870 }
    871 
    872 static void
    873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid)
    874 {
    875 	nfs4_lo_name_t *cast_namep;
    876 
    877 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    878 	    "nfs4_set_new_lock_owner_args"));
    879 
    880 	owner->owner_len = sizeof (*cast_namep);
    881 	owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
    882 	/*
    883 	 * A Solaris lock_owner is <seq_num><pid>
    884 	 */
    885 	cast_namep = (nfs4_lo_name_t *)owner->owner_val;
    886 	cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1);
    887 	cast_namep->ln_pid = pid;
    888 }
    889 
    890 /*
    891  * Fill in the lock owner args.
    892  */
    893 void
    894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid)
    895 {
    896 	nfs4_lock_owner_t *lop;
    897 
    898 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
    899 	    "nfs4_setlockowner_args"));
    900 
    901 	/* This increments lop's ref count */
    902 	lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
    903 
    904 	if (!lop)
    905 		goto make_up_args;
    906 
    907 	mutex_enter(&lop->lo_lock);
    908 	owner->owner_len = sizeof (lop->lock_owner_name);
    909 	owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
    910 	bcopy(&lop->lock_owner_name, owner->owner_val,
    911 	    owner->owner_len);
    912 	mutex_exit(&lop->lo_lock);
    913 	lock_owner_rele(lop);
    914 	return;
    915 
    916 make_up_args:
    917 	nfs4_set_new_lock_owner_args(owner, pid);
    918 }
    919 
    920 /*
    921  * This ends our use of the open owner's open seqid by setting
    922  * the appropiate flags and issuing a cv_signal to wake up another
    923  * thread waiting to use the open seqid.
    924  */
    925 
    926 void
    927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop)
    928 {
    929 	mutex_enter(&oop->oo_lock);
    930 	ASSERT(oop->oo_seqid_inuse);
    931 	oop->oo_seqid_inuse = 0;
    932 	cv_broadcast(&oop->oo_cv_seqid_sync);
    933 	mutex_exit(&oop->oo_lock);
    934 }
    935 
    936 /*
    937  * This starts our use of the open owner's open seqid by setting
    938  * the oo_seqid_inuse to true.  We will wait (forever) with a
    939  * cv_wait() until we are woken up.
    940  *
    941  * Return values:
    942  * 0		no problems
    943  * EAGAIN	caller should retry (like a recovery retry)
    944  */
    945 int
    946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi)
    947 {
    948 	int error = 0;
    949 #ifdef DEBUG
    950 	static int ops = 0;		/* fault injection */
    951 #endif
    952 
    953 #ifdef DEBUG
    954 	if (seqid_sync_faults && curthread != mi->mi_recovthread &&
    955 	    ++ops % 5 == 0)
    956 		return (EAGAIN);
    957 #endif
    958 
    959 	mutex_enter(&mi->mi_lock);
    960 	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
    961 	    curthread != mi->mi_recovthread)
    962 		error = EAGAIN;
    963 	mutex_exit(&mi->mi_lock);
    964 	if (error != 0)
    965 		goto done;
    966 
    967 	mutex_enter(&oop->oo_lock);
    968 
    969 	while (oop->oo_seqid_inuse) {
    970 		NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
    971 		    "nfs4_start_open_seqid_sync waiting on cv"));
    972 
    973 		cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock);
    974 	}
    975 
    976 	oop->oo_seqid_inuse = 1;
    977 
    978 	mutex_exit(&oop->oo_lock);
    979 
    980 	mutex_enter(&mi->mi_lock);
    981 	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
    982 	    curthread != mi->mi_recovthread)
    983 		error = EAGAIN;
    984 	mutex_exit(&mi->mi_lock);
    985 
    986 	if (error == EAGAIN)
    987 		nfs4_end_open_seqid_sync(oop);
    988 
    989 	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
    990 	    "nfs4_start_open_seqid_sync: error=%d", error));
    991 
    992 done:
    993 	return (error);
    994 }
    995 
    996 #ifdef	DEBUG
    997 int bypass_otw[2];
    998 #endif
    999 
   1000 /*
   1001  * Checks to see if the OPEN OTW is necessary that is, if it's already
   1002  * been opened with the same access and deny bits we are now asking for.
   1003  * Note, this assumes that *vpp is a rnode.
   1004  */
   1005 int
   1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp,
   1007 	int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp)
   1008 {
   1009 	rnode4_t *rp;
   1010 	nfs4_open_stream_t *osp;
   1011 	open_delegation_type4 dt;
   1012 
   1013 	rp = VTOR4(vp);
   1014 
   1015 	/*
   1016 	 * Grab the delegation type.  This function is protected against
   1017 	 * the delegation being returned by virtue of start_op (called
   1018 	 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode,
   1019 	 * delegreturn requires this lock in write mode to proceed.
   1020 	 */
   1021 	ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER));
   1022 	dt = get_dtype(rp);
   1023 
   1024 	/* returns with 'os_sync_lock' held */
   1025 	osp = find_open_stream(oop, rp);
   1026 
   1027 	if (osp) {
   1028 		uint32_t	do_otw = 0;
   1029 
   1030 		if (osp->os_failed_reopen) {
   1031 			NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE,
   1032 			    "nfs4_is_otw_open_necessary: os_failed_reopen "
   1033 			    "set on osp %p, cr %p, rp %s", (void *)osp,
   1034 			    (void *)osp->os_open_owner->oo_cred,
   1035 			    rnode4info(rp)));
   1036 			do_otw = 1;
   1037 		}
   1038 
   1039 		/*
   1040 		 * check access/deny bits
   1041 		 */
   1042 		if (!do_otw && (flag & FREAD))
   1043 			if (osp->os_share_acc_read == 0 &&
   1044 			    dt == OPEN_DELEGATE_NONE)
   1045 				do_otw = 1;
   1046 
   1047 		if (!do_otw && (flag & FWRITE))
   1048 			if (osp->os_share_acc_write == 0 &&
   1049 			    dt != OPEN_DELEGATE_WRITE)
   1050 				do_otw = 1;
   1051 
   1052 		if (!do_otw) {
   1053 			NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
   1054 			    "nfs4_is_otw_open_necessary: can skip this "
   1055 			    "open OTW"));
   1056 			if (!just_been_created) {
   1057 				osp->os_open_ref_count++;
   1058 				if (flag & FREAD)
   1059 					osp->os_share_acc_read++;
   1060 				if (flag & FWRITE)
   1061 					osp->os_share_acc_write++;
   1062 				osp->os_share_deny_none++;
   1063 			}
   1064 
   1065 			/*
   1066 			 * Need to reset this bitfield for the possible case
   1067 			 * where we were going to OTW CLOSE the file, got a
   1068 			 * non-recoverable error, and before we could retry
   1069 			 * the CLOSE, OPENed the file again.
   1070 			 */
   1071 			ASSERT(osp->os_open_owner->oo_seqid_inuse);
   1072 			osp->os_final_close = 0;
   1073 			osp->os_force_close = 0;
   1074 
   1075 			mutex_exit(&osp->os_sync_lock);
   1076 			open_stream_rele(osp, rp);
   1077 
   1078 #ifdef	DEBUG
   1079 			bypass_otw[0]++;
   1080 #endif
   1081 
   1082 			*errorp = 0;
   1083 			return (0);
   1084 		}
   1085 		mutex_exit(&osp->os_sync_lock);
   1086 		open_stream_rele(osp, rp);
   1087 
   1088 	} else if (dt != OPEN_DELEGATE_NONE) {
   1089 		/*
   1090 		 * Even if there isn't an open_stream yet, we may still be
   1091 		 * able to bypass the otw open if the client owns a delegation.
   1092 		 *
   1093 		 * If you are asking for for WRITE, but I only have
   1094 		 * a read delegation, then you still have to go otw.
   1095 		 */
   1096 
   1097 		if (flag & FWRITE && dt == OPEN_DELEGATE_READ)
   1098 			return (1);
   1099 
   1100 		/*
   1101 		 * TODO - evaluate the nfsace4
   1102 		 */
   1103 
   1104 		/*
   1105 		 * Check the access flags to make sure the caller
   1106 		 * had permission.
   1107 		 */
   1108 		if (flag & FREAD && !(acc & VREAD))
   1109 			return (1);
   1110 
   1111 		if (flag & FWRITE && !(acc & VWRITE))
   1112 			return (1);
   1113 
   1114 		/*
   1115 		 * create_open_stream will add a reference to oop,
   1116 		 * this will prevent the open_owner_rele done in
   1117 		 * nfs4open_otw from destroying the open_owner.
   1118 		 */
   1119 
   1120 		/* returns with 'os_sync_lock' held */
   1121 		osp = create_open_stream(oop, rp);
   1122 		if (osp == NULL)
   1123 			return (1);
   1124 
   1125 		osp->open_stateid = rp->r_deleg_stateid;
   1126 		osp->os_delegation = 1;
   1127 
   1128 		if (flag & FREAD)
   1129 			osp->os_share_acc_read++;
   1130 		if (flag & FWRITE)
   1131 			osp->os_share_acc_write++;
   1132 
   1133 		osp->os_share_deny_none++;
   1134 		mutex_exit(&osp->os_sync_lock);
   1135 
   1136 		open_stream_rele(osp, rp);
   1137 
   1138 		mutex_enter(&oop->oo_lock);
   1139 		oop->oo_just_created = NFS4_PERM_CREATED;
   1140 		mutex_exit(&oop->oo_lock);
   1141 
   1142 		ASSERT(rsp != NULL);
   1143 		if (rsp->rs_sp != NULL) {
   1144 			mutex_enter(&rsp->rs_sp->s_lock);
   1145 			nfs4_inc_state_ref_count_nolock(rsp->rs_sp,
   1146 			    VTOMI4(vp));
   1147 			mutex_exit(&rsp->rs_sp->s_lock);
   1148 		}
   1149 #ifdef	DEBUG
   1150 		bypass_otw[1]++;
   1151 #endif
   1152 
   1153 		*errorp = 0;
   1154 		return (0);
   1155 	}
   1156 
   1157 	return (1);
   1158 }
   1159 
   1160 static open_delegation_type4
   1161 get_dtype(rnode4_t *rp)
   1162 {
   1163 	open_delegation_type4 dt;
   1164 
   1165 	mutex_enter(&rp->r_statev4_lock);
   1166 	ASSERT(!rp->r_deleg_return_inprog);
   1167 	if (rp->r_deleg_return_pending)
   1168 		dt = OPEN_DELEGATE_NONE;
   1169 	else
   1170 		dt = rp->r_deleg_type;
   1171 	mutex_exit(&rp->r_statev4_lock);
   1172 
   1173 	return (dt);
   1174 }
   1175 
   1176 /*
   1177  * Fill in *locker with the lock state arguments for a LOCK call.  If
   1178  * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL.
   1179  * Caller must already hold the necessary seqid sync lock(s).
   1180  */
   1181 
   1182 void
   1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop,
   1184 	nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker)
   1185 {
   1186 	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
   1187 	if (lop->lo_just_created == NFS4_JUST_CREATED) {
   1188 		/* this is a new lock request */
   1189 		open_to_lock_owner4 *nown;
   1190 
   1191 		ASSERT(oop != NULL);
   1192 		ASSERT(osp != NULL);
   1193 
   1194 		locker->new_lock_owner = TRUE;
   1195 		nown = &locker->locker4_u.open_owner;
   1196 		nown->open_seqid = nfs4_get_open_seqid(oop) + 1;
   1197 		mutex_enter(&osp->os_sync_lock);
   1198 		nown->open_stateid = osp->open_stateid;
   1199 		mutex_exit(&osp->os_sync_lock);
   1200 		nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */
   1201 
   1202 		nown->lock_owner.clientid = clientid;
   1203 		nown->lock_owner.owner_len = sizeof (lop->lock_owner_name);
   1204 		nown->lock_owner.owner_val =
   1205 		    kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP);
   1206 		bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val,
   1207 		    nown->lock_owner.owner_len);
   1208 	} else {
   1209 		exist_lock_owner4 *eown;
   1210 		/* have an existing lock owner */
   1211 
   1212 		locker->new_lock_owner = FALSE;
   1213 		eown = &locker->locker4_u.lock_owner;
   1214 		mutex_enter(&lop->lo_lock);
   1215 		eown->lock_stateid = lop->lock_stateid;
   1216 		mutex_exit(&lop->lo_lock);
   1217 		eown->lock_seqid = lop->lock_seqid + 1;
   1218 	}
   1219 }
   1220 
   1221 /*
   1222  * This starts our use of the lock owner's lock seqid by setting
   1223  * the lo_flags to NFS4_LOCK_SEQID_INUSE.  We will wait (forever)
   1224  * with a cv_wait() until we are woken up.
   1225  *
   1226  * Return values:
   1227  * 0		no problems
   1228  * EAGAIN	caller should retry (like a recovery retry)
   1229  */
   1230 int
   1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi)
   1232 {
   1233 	int error = 0;
   1234 #ifdef DEBUG
   1235 	static int ops = 0;		/* fault injection */
   1236 #endif
   1237 
   1238 #ifdef DEBUG
   1239 	if (seqid_sync_faults && curthread != mi->mi_recovthread &&
   1240 	    ++ops % 7 == 0)
   1241 		return (EAGAIN);
   1242 #endif
   1243 
   1244 	mutex_enter(&mi->mi_lock);
   1245 	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
   1246 	    curthread != mi->mi_recovthread)
   1247 		error = EAGAIN;
   1248 	mutex_exit(&mi->mi_lock);
   1249 	if (error != 0)
   1250 		goto done;
   1251 
   1252 	mutex_enter(&lop->lo_lock);
   1253 
   1254 	ASSERT(lop->lo_seqid_holder != curthread);
   1255 	while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) {
   1256 		NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
   1257 		    "nfs4_start_lock_seqid_sync: waiting on cv"));
   1258 
   1259 		cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock);
   1260 	}
   1261 	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: "
   1262 	    "NFS4_LOCK_SEQID_INUSE"));
   1263 
   1264 	lop->lo_flags |= NFS4_LOCK_SEQID_INUSE;
   1265 	lop->lo_seqid_holder = curthread;
   1266 	mutex_exit(&lop->lo_lock);
   1267 
   1268 	mutex_enter(&mi->mi_lock);
   1269 	if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
   1270 	    curthread != mi->mi_recovthread)
   1271 		error = EAGAIN;
   1272 	mutex_exit(&mi->mi_lock);
   1273 
   1274 	if (error == EAGAIN)
   1275 		nfs4_end_lock_seqid_sync(lop);
   1276 
   1277 	NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
   1278 	    "nfs4_start_lock_seqid_sync: error=%d", error));
   1279 
   1280 done:
   1281 	return (error);
   1282 }
   1283 
   1284 /*
   1285  * This ends our use of the lock owner's lock seqid by setting
   1286  * the appropiate flags and issuing a cv_signal to wake up another
   1287  * thread waiting to use the lock seqid.
   1288  */
   1289 void
   1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop)
   1291 {
   1292 	mutex_enter(&lop->lo_lock);
   1293 	ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
   1294 	ASSERT(lop->lo_seqid_holder == curthread);
   1295 	lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE;
   1296 	lop->lo_seqid_holder = NULL;
   1297 	cv_broadcast(&lop->lo_cv_seqid_sync);
   1298 	mutex_exit(&lop->lo_lock);
   1299 }
   1300 
   1301 /*
   1302  * Returns a reference to a lock owner via lopp, which has its lock seqid
   1303  * synchronization started.
   1304  * If the lock owner is in the 'just_created' state, then we return its open
   1305  * owner and open stream and start the open seqid synchronization.
   1306  *
   1307  * Return value:
   1308  * NFS4_OK		no problems
   1309  * NFS4ERR_DELAY	there is lost state to recover; caller should retry
   1310  * NFS4ERR_IO		no open stream
   1311  */
   1312 nfsstat4
   1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr,
   1314 	nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp,
   1315 	nfs4_lock_owner_t **lopp)
   1316 {
   1317 	nfs4_lock_owner_t *lop, *next_lop;
   1318 	mntinfo4_t *mi;
   1319 	int error = 0;
   1320 	nfsstat4 stat;
   1321 
   1322 	mi = VTOMI4(RTOV4(rp));
   1323 
   1324 	mutex_enter(&rp->r_statev4_lock);
   1325 
   1326 	lop = rp->r_lo_head.lo_next_rnode;
   1327 	while (lop != &rp->r_lo_head) {
   1328 		mutex_enter(&lop->lo_lock);
   1329 		if (lop->lo_pid == pid && lop->lo_valid != 0) {
   1330 			/* Found a matching lock owner */
   1331 			NFS4_DEBUG(nfs4_client_state_debug,
   1332 			    (CE_NOTE, "nfs4_find_or_create_lock_owner: "
   1333 			    "got a match"));
   1334 			lop->lo_ref_count++;
   1335 			break;
   1336 		}
   1337 		next_lop = lop->lo_next_rnode;
   1338 		mutex_exit(&lop->lo_lock);
   1339 		lop = next_lop;
   1340 	}
   1341 
   1342 	if (lop == &rp->r_lo_head) {
   1343 		/* create temporary lock owner */
   1344 		lop = create_lock_owner(rp, pid);
   1345 	}
   1346 	mutex_exit(&rp->r_statev4_lock);
   1347 
   1348 	/* Have a locked down lock owner struct now */
   1349 	if (lop->lo_just_created != NFS4_JUST_CREATED) {
   1350 		/* This is an existing lock owner */
   1351 		*oopp = NULL;
   1352 		*ospp = NULL;
   1353 	} else {
   1354 		/* Lock owner doesn't exist yet */
   1355 
   1356 		/* First grab open owner seqid synchronization */
   1357 		mutex_exit(&lop->lo_lock);
   1358 		*oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi);
   1359 		if (*oopp == NULL)
   1360 			goto kill_new_lop;
   1361 		error = nfs4_start_open_seqid_sync(*oopp, mi);
   1362 		if (error == EAGAIN) {
   1363 			stat = NFS4ERR_DELAY;
   1364 			goto failed;
   1365 		}
   1366 		*ospp = find_open_stream(*oopp, rp);
   1367 		if (*ospp == NULL) {
   1368 			nfs4_end_open_seqid_sync(*oopp);
   1369 			goto kill_new_lop;
   1370 		}
   1371 		if ((*ospp)->os_failed_reopen) {
   1372 			mutex_exit(&(*ospp)->os_sync_lock);
   1373 			NFS4_DEBUG((nfs4_open_stream_debug ||
   1374 			    nfs4_client_lock_debug), (CE_NOTE,
   1375 			    "nfs4_find_or_create_lock_owner: os_failed_reopen;"
   1376 			    "osp %p, cr %p, rp %s", (void *)(*ospp),
   1377 			    (void *)cr, rnode4info(rp)));
   1378 			nfs4_end_open_seqid_sync(*oopp);
   1379 			stat = NFS4ERR_IO;
   1380 			goto failed;
   1381 		}
   1382 		mutex_exit(&(*ospp)->os_sync_lock);
   1383 
   1384 		/*
   1385 		 * Now see if the lock owner has become permanent while we
   1386 		 * had released our lock.
   1387 		 */
   1388 		mutex_enter(&lop->lo_lock);
   1389 		if (lop->lo_just_created != NFS4_JUST_CREATED) {
   1390 			nfs4_end_open_seqid_sync(*oopp);
   1391 			open_stream_rele(*ospp, rp);
   1392 			open_owner_rele(*oopp);
   1393 			*oopp = NULL;
   1394 			*ospp = NULL;
   1395 		}
   1396 	}
   1397 	mutex_exit(&lop->lo_lock);
   1398 
   1399 	error = nfs4_start_lock_seqid_sync(lop, mi);
   1400 	if (error == EAGAIN) {
   1401 		if (*oopp != NULL)
   1402 			nfs4_end_open_seqid_sync(*oopp);
   1403 		stat = NFS4ERR_DELAY;
   1404 		goto failed;
   1405 	}
   1406 	ASSERT(error == 0);
   1407 
   1408 	*lopp = lop;
   1409 	return (NFS4_OK);
   1410 
   1411 kill_new_lop:
   1412 	/*
   1413 	 * A previous CLOSE was attempted but got EINTR, but the application
   1414 	 * continued to use the unspecified state file descriptor.  But now the
   1415 	 * open stream is gone (which could also destroy the open owner), hence
   1416 	 * we can no longer continue.  The calling function should return EIO
   1417 	 * to the application.
   1418 	 */
   1419 	NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug,
   1420 	    (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created "
   1421 	    "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp),
   1422 	    (void *)(*ospp)));
   1423 
   1424 	nfs4_rnode_remove_lock_owner(rp, lop);
   1425 	stat = NFS4ERR_IO;
   1426 
   1427 failed:
   1428 	lock_owner_rele(lop);
   1429 	if (*oopp) {
   1430 		open_owner_rele(*oopp);
   1431 		*oopp = NULL;
   1432 	}
   1433 	if (*ospp) {
   1434 		open_stream_rele(*ospp, rp);
   1435 		*ospp = NULL;
   1436 	}
   1437 	return (stat);
   1438 }
   1439 
   1440 /*
   1441  * This function grabs a recently freed open owner off of the freed open
   1442  * owner list if there is a match on the cred 'cr'.  It returns NULL if no
   1443  * such match is found.  It will set the 'oo_ref_count' and 'oo_valid' back
   1444  * to both 1 (sane values) in the case a match is found.
   1445  */
   1446 static nfs4_open_owner_t *
   1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp,
   1448 	mntinfo4_t *mi)
   1449 {
   1450 	nfs4_open_owner_t		*foop;
   1451 
   1452 	NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
   1453 	    "find_freed_open_owner: cred %p", (void*)cr));
   1454 
   1455 	ASSERT(mutex_owned(&mi->mi_lock));
   1456 	ASSERT(mutex_owned(&bucketp->b_lock));
   1457 
   1458 	/* got hash bucket, search through freed open owners */
   1459 	for (foop = list_head(&mi->mi_foo_list); foop != NULL;
   1460 	    foop = list_next(&mi->mi_foo_list, foop)) {
   1461 		if (!crcmp(foop->oo_cred, cr)) {
   1462 			NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
   1463 			    "find_freed_open_owner: got a match open owner "
   1464 			    "%p", (void *)foop));
   1465 			foop->oo_ref_count = 1;
   1466 			foop->oo_valid = 1;
   1467 			list_remove(&mi->mi_foo_list, foop);
   1468 			mi->mi_foo_num--;
   1469 
   1470 			/* now add the struct into the cred hash table */
   1471 			list_insert_head(&bucketp->b_oo_hash_list, foop);
   1472 			return (foop);
   1473 		}
   1474 	}
   1475 
   1476 	return (NULL);
   1477 }
   1478 
   1479 /*
   1480  * Insert the newly freed 'oop' into the mi's freed oop list,
   1481  * always at the head of the list.  If we've already reached
   1482  * our maximum allowed number of freed open owners (mi_foo_max),
   1483  * then remove the LRU open owner on the list (namely the tail).
   1484  */
   1485 static void
   1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi)
   1487 {
   1488 	nfs4_open_owner_t *lru_foop;
   1489 
   1490 	if (mi->mi_foo_num < mi->mi_foo_max) {
   1491 		NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
   1492 		    "nfs4_free_open_owner: num free %d, max free %d, "
   1493 		    "insert open owner %p for mntinfo4 %p",
   1494 		    mi->mi_foo_num, mi->mi_foo_max, (void *)oop,
   1495 		    (void *)mi));
   1496 		list_insert_head(&mi->mi_foo_list, oop);
   1497 		mi->mi_foo_num++;
   1498 		return;
   1499 	}
   1500 
   1501 	/* need to replace a freed open owner */
   1502 
   1503 	lru_foop = list_tail(&mi->mi_foo_list);
   1504 
   1505 	NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
   1506 	    "nfs4_free_open_owner: destroy %p, insert %p",
   1507 	    (void *)lru_foop, (void *)oop));
   1508 
   1509 	list_remove(&mi->mi_foo_list, lru_foop);
   1510 	nfs4_destroy_open_owner(lru_foop);
   1511 
   1512 	/* head always has latest freed oop */
   1513 	list_insert_head(&mi->mi_foo_list, oop);
   1514 }
   1515 
   1516 void
   1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop)
   1518 {
   1519 	ASSERT(oop != NULL);
   1520 
   1521 	crfree(oop->oo_cred);
   1522 	if (oop->oo_cred_otw)
   1523 		crfree(oop->oo_cred_otw);
   1524 	mutex_destroy(&oop->oo_lock);
   1525 	cv_destroy(&oop->oo_cv_seqid_sync);
   1526 	kmem_free(oop, sizeof (*oop));
   1527 }
   1528 
   1529 seqid4
   1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop)
   1531 {
   1532 	ASSERT(oop->oo_seqid_inuse);
   1533 	return (oop->oo_seqid);
   1534 }
   1535 
   1536 /*
   1537  * This set's the open seqid for a <open owner/ mntinfo4> pair.
   1538  */
   1539 void
   1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop,
   1541 	nfs4_tag_type_t tag_type)
   1542 {
   1543 	ASSERT(oop->oo_seqid_inuse);
   1544 	oop->oo_seqid = seqid;
   1545 	oop->oo_last_good_seqid = seqid;
   1546 	oop->oo_last_good_op = tag_type;
   1547 }
   1548 
   1549 /*
   1550  * This bumps the current open seqid for the open owner 'oop'.
   1551  */
   1552 void
   1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop,
   1554     nfs4_tag_type_t tag_type)
   1555 {
   1556 	ASSERT(oop->oo_seqid_inuse);
   1557 	oop->oo_seqid++;
   1558 	oop->oo_last_good_seqid = oop->oo_seqid;
   1559 	oop->oo_last_good_op = tag_type;
   1560 }
   1561 
   1562 /*
   1563  * If no open owner was provided, this function takes the cred to find an
   1564  * open owner within the given mntinfo4_t.  Either way we return the
   1565  * open owner's OTW credential if it exists; otherwise returns the
   1566  * supplied 'cr'.
   1567  *
   1568  * A hold is put on the returned credential, and it is up to the caller
   1569  * to free the cred.
   1570  */
   1571 cred_t *
   1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop)
   1573 {
   1574 	cred_t *ret_cr;
   1575 	nfs4_open_owner_t *oop = provided_oop;
   1576 
   1577 	if (oop == NULL)
   1578 		oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
   1579 	if (oop != NULL) {
   1580 		mutex_enter(&oop->oo_lock);
   1581 		if (oop->oo_cred_otw)
   1582 			ret_cr = oop->oo_cred_otw;
   1583 		else
   1584 			ret_cr = cr;
   1585 		crhold(ret_cr);
   1586 		mutex_exit(&oop->oo_lock);
   1587 		if (provided_oop == NULL)
   1588 			open_owner_rele(oop);
   1589 	} else {
   1590 		ret_cr = cr;
   1591 		crhold(ret_cr);
   1592 	}
   1593 	return (ret_cr);
   1594 }
   1595 
   1596 /*
   1597  * Retrieves the next open stream in the rnode's list if an open stream
   1598  * is provided; otherwise gets the first open stream in the list.
   1599  * The open owner for that open stream is then retrieved, and if its
   1600  * oo_cred_otw exists then it is returned; otherwise the provided 'cr'
   1601  * is returned.  *osp is set to the 'found' open stream.
   1602  *
   1603  * Note: we don't set *osp to the open stream retrieved via the
   1604  * optimized check since that won't necessarily be at the beginning
   1605  * of the rnode list, and if that osp doesn't work we'd like to
   1606  * check _all_ open streams (starting from the beginning of the
   1607  * rnode list).
   1608  */
   1609 cred_t *
   1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr,
   1611 	nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time)
   1612 {
   1613 	nfs4_open_stream_t *next_osp = NULL;
   1614 	cred_t *ret_cr;
   1615 
   1616 	ASSERT(cr != NULL);
   1617 	/*
   1618 	 * As an optimization, try to find the open owner
   1619 	 * for the cred provided since that's most likely
   1620 	 * to work.
   1621 	 */
   1622 	if (*first_time) {
   1623 		nfs4_open_owner_t *oop;
   1624 
   1625 		oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp)));
   1626 		if (oop) {
   1627 			next_osp = find_open_stream(oop, rp);
   1628 			if (next_osp)
   1629 				mutex_exit(&next_osp->os_sync_lock);
   1630 			open_owner_rele(oop);
   1631 		}
   1632 	}
   1633 	if (next_osp == NULL) {
   1634 		int delay_rele = 0;
   1635 		*first_time = FALSE;
   1636 
   1637 		/* return the next open stream for this rnode */
   1638 		mutex_enter(&rp->r_os_lock);
   1639 		/* Now, no one can add or delete to rp's open streams list */
   1640 
   1641 		if (*osp) {
   1642 			next_osp = list_next(&rp->r_open_streams, *osp);
   1643 			/*
   1644 			 * Delay the rele of *osp until after we drop
   1645 			 * r_os_lock to not deadlock with oo_lock
   1646 			 * via an open_stream_rele()->open_owner_rele().
   1647 			 */
   1648 			delay_rele = 1;
   1649 		} else {
   1650 			next_osp = list_head(&rp->r_open_streams);
   1651 		}
   1652 		if (next_osp) {
   1653 			nfs4_open_stream_t *tmp_osp;
   1654 
   1655 			/* find the next valid open stream */
   1656 			mutex_enter(&next_osp->os_sync_lock);
   1657 			while (next_osp && !next_osp->os_valid) {
   1658 				tmp_osp =
   1659 				    list_next(&rp->r_open_streams, next_osp);
   1660 				mutex_exit(&next_osp->os_sync_lock);
   1661 				next_osp = tmp_osp;
   1662 				if (next_osp)
   1663 					mutex_enter(&next_osp->os_sync_lock);
   1664 			}
   1665 			if (next_osp) {
   1666 				next_osp->os_ref_count++;
   1667 				mutex_exit(&next_osp->os_sync_lock);
   1668 			}
   1669 		}
   1670 		mutex_exit(&rp->r_os_lock);
   1671 		if (delay_rele)
   1672 			open_stream_rele(*osp, rp);
   1673 	}
   1674 
   1675 	if (next_osp) {
   1676 		nfs4_open_owner_t *oop;
   1677 
   1678 		oop = next_osp->os_open_owner;
   1679 		mutex_enter(&oop->oo_lock);
   1680 		if (oop->oo_cred_otw)
   1681 			ret_cr = oop->oo_cred_otw;
   1682 		else
   1683 			ret_cr = cr;
   1684 		crhold(ret_cr);
   1685 		mutex_exit(&oop->oo_lock);
   1686 		if (*first_time) {
   1687 			open_stream_rele(next_osp, rp);
   1688 			*osp = NULL;
   1689 		} else
   1690 			*osp = next_osp;
   1691 	} else {
   1692 		/* just return the cred provided to us */
   1693 		*last_time = TRUE;
   1694 		*osp = NULL;
   1695 		ret_cr = cr;
   1696 		crhold(ret_cr);
   1697 	}
   1698 
   1699 	*first_time = FALSE;
   1700 	return (ret_cr);
   1701 }
   1702 
   1703 void
   1704 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp)
   1705 {
   1706 	bzero(&sid_tp->d_sid, sizeof (stateid4));
   1707 	bzero(&sid_tp->l_sid, sizeof (stateid4));
   1708 	bzero(&sid_tp->o_sid, sizeof (stateid4));
   1709 	sid_tp->cur_sid_type = NO_SID;
   1710 }
   1711 
   1712 void
   1713 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp)
   1714 {
   1715 	NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
   1716 	    "nfs4_save_stateid: saved %s stateid",
   1717 	    sid_tp->cur_sid_type == DEL_SID ? "delegation" :
   1718 	    sid_tp->cur_sid_type == LOCK_SID ? "lock" :
   1719 	    sid_tp->cur_sid_type == OPEN_SID ? "open" : "special"));
   1720 
   1721 	switch (sid_tp->cur_sid_type) {
   1722 	case DEL_SID:
   1723 		sid_tp->d_sid = *s1;
   1724 		break;
   1725 	case LOCK_SID:
   1726 		sid_tp->l_sid = *s1;
   1727 		break;
   1728 	case OPEN_SID:
   1729 		sid_tp->o_sid = *s1;
   1730 		break;
   1731 	case SPEC_SID:
   1732 	default:
   1733 		cmn_err(CE_PANIC, "nfs4_save_stateid: illegal "
   1734 		    "stateid type %d", sid_tp->cur_sid_type);
   1735 	}
   1736 }
   1737 
   1738 /*
   1739  * We got NFS4ERR_BAD_SEQID.  Setup some arguments to pass to recovery.
   1740  * Caller is responsible for freeing.
   1741  */
   1742 nfs4_bseqid_entry_t *
   1743 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop,
   1744     vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid)
   1745 {
   1746 	nfs4_bseqid_entry_t	*bsep;
   1747 
   1748 	bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP);
   1749 	bsep->bs_oop = oop;
   1750 	bsep->bs_lop = lop;
   1751 	bsep->bs_vp = vp;
   1752 	bsep->bs_pid = pid;
   1753 	bsep->bs_tag = tag;
   1754 	bsep->bs_seqid = seqid;
   1755 
   1756 	return (bsep);
   1757 }
   1758 
   1759 void
   1760 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
   1761 	nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr,
   1762 	vnode_t *vp, int access_close, int deny_close)
   1763 {
   1764 	lost_rqstp->lr_putfirst = FALSE;
   1765 
   1766 	ASSERT(vp != NULL);
   1767 	if (error == ETIMEDOUT || error == EINTR ||
   1768 	    NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
   1769 		NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   1770 		    "nfs4open_dg_save_lost_rqst: error %d", error));
   1771 
   1772 		lost_rqstp->lr_op = OP_OPEN_DOWNGRADE;
   1773 		/*
   1774 		 * The vp is held and rele'd via the recovery code.
   1775 		 * See nfs4_save_lost_rqst.
   1776 		 */
   1777 		lost_rqstp->lr_vp = vp;
   1778 		lost_rqstp->lr_dvp = NULL;
   1779 		lost_rqstp->lr_oop = oop;
   1780 		lost_rqstp->lr_osp = osp;
   1781 		lost_rqstp->lr_lop = NULL;
   1782 		lost_rqstp->lr_cr = cr;
   1783 		lost_rqstp->lr_flk = NULL;
   1784 		lost_rqstp->lr_dg_acc = access_close;
   1785 		lost_rqstp->lr_dg_deny = deny_close;
   1786 		lost_rqstp->lr_putfirst = FALSE;
   1787 	} else {
   1788 		lost_rqstp->lr_op = 0;
   1789 	}
   1790 }
   1791 
   1792 /*
   1793  * Change the access and deny bits of an OPEN.
   1794  * If recovery is needed, *recov_credpp is set to the cred used OTW,
   1795  * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW.
   1796  */
   1797 void
   1798 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop,
   1799 	nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp,
   1800 	nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp)
   1801 {
   1802 	mntinfo4_t		*mi;
   1803 	int			downgrade_acc, downgrade_deny;
   1804 	int			new_acc, new_deny;
   1805 	COMPOUND4args_clnt	args;
   1806 	COMPOUND4res_clnt	res;
   1807 	OPEN_DOWNGRADE4res	*odg_res;
   1808 	nfs_argop4		argop[3];
   1809 	nfs_resop4		*resop;
   1810 	rnode4_t		*rp;
   1811 	bool_t			needrecov = FALSE;
   1812 	int			doqueue = 1;
   1813 	seqid4			seqid = 0;
   1814 	cred_t			*cred_otw;
   1815 	hrtime_t		t;
   1816 
   1817 	ASSERT(mutex_owned(&osp->os_sync_lock));
   1818 #if DEBUG
   1819 	mutex_enter(&oop->oo_lock);
   1820 	ASSERT(oop->oo_seqid_inuse);
   1821 	mutex_exit(&oop->oo_lock);
   1822 #endif
   1823 
   1824 
   1825 	if (access_close == 0 && deny_close == 0) {
   1826 		nfs4_error_zinit(ep);
   1827 		return;
   1828 	}
   1829 
   1830 	cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop);
   1831 
   1832 cred_retry:
   1833 	nfs4_error_zinit(ep);
   1834 	downgrade_acc = 0;
   1835 	downgrade_deny = 0;
   1836 	mi = VTOMI4(vp);
   1837 	rp = VTOR4(vp);
   1838 
   1839 	/*
   1840 	 * Check to see if the open stream got closed before we go OTW,
   1841 	 * now that we have acquired the 'os_sync_lock'.
   1842 	 */
   1843 	if (!osp->os_valid) {
   1844 		NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
   1845 		    " open stream has already been closed, return success"));
   1846 		/* error has already been set */
   1847 		goto no_args_out;
   1848 	}
   1849 
   1850 	/* If the file failed recovery, just quit. */
   1851 	mutex_enter(&rp->r_statelock);
   1852 	if (rp->r_flags & R4RECOVERR) {
   1853 		mutex_exit(&rp->r_statelock);
   1854 		ep->error = EIO;
   1855 		goto no_args_out;
   1856 	}
   1857 	mutex_exit(&rp->r_statelock);
   1858 
   1859 	seqid = nfs4_get_open_seqid(oop) + 1;
   1860 
   1861 	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
   1862 	    "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"",
   1863 	    access_close, osp->os_share_acc_read, osp->os_share_acc_write));
   1864 
   1865 	/* If we're closing the last READ, need to downgrade */
   1866 	if ((access_close & FREAD) && (osp->os_share_acc_read == 1))
   1867 		downgrade_acc |= OPEN4_SHARE_ACCESS_READ;
   1868 
   1869 	/* if we're closing the last WRITE, need to downgrade */
   1870 	if ((access_close & FWRITE) && (osp->os_share_acc_write == 1))
   1871 		downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE;
   1872 
   1873 	downgrade_deny = OPEN4_SHARE_DENY_NONE;
   1874 
   1875 	new_acc = 0;
   1876 	new_deny = 0;
   1877 
   1878 	/* set our new access and deny share bits */
   1879 	if ((osp->os_share_acc_read > 0) &&
   1880 	    !(downgrade_acc & OPEN4_SHARE_ACCESS_READ))
   1881 		new_acc |= OPEN4_SHARE_ACCESS_READ;
   1882 	if ((osp->os_share_acc_write > 0) &&
   1883 	    !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE))
   1884 		new_acc |= OPEN4_SHARE_ACCESS_WRITE;
   1885 
   1886 	new_deny = OPEN4_SHARE_DENY_NONE;
   1887 
   1888 	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
   1889 	    "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny));
   1890 	NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
   1891 	    "new acc 0x%x deny 0x%x", new_acc, new_deny));
   1892 
   1893 	/*
   1894 	 * Check to see if we aren't actually doing any downgrade or
   1895 	 * if this is the last 'close' but the file is still mmapped.
   1896 	 * Skip this if this a lost request resend so we don't decrement
   1897 	 * the osp's share counts more than once.
   1898 	 */
   1899 	if (!lrp &&
   1900 	    ((downgrade_acc == 0 && downgrade_deny == 0) ||
   1901 	    (new_acc == 0 && new_deny == 0))) {
   1902 		/*
   1903 		 * No downgrade to do, but still need to
   1904 		 * update osp's os_share_* counts.
   1905 		 */
   1906 		NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE,
   1907 		    "nfs4_open_downgrade: just lower the osp's count by %s",
   1908 		    (access_close & FREAD) && (access_close & FWRITE) ?
   1909 		    "read and write" : (access_close & FREAD) ? "read" :
   1910 		    (access_close & FWRITE) ? "write" : "bogus"));
   1911 		if (access_close & FREAD)
   1912 			osp->os_share_acc_read--;
   1913 		if (access_close & FWRITE)
   1914 			osp->os_share_acc_write--;
   1915 		osp->os_share_deny_none--;
   1916 		nfs4_error_zinit(ep);
   1917 
   1918 		goto no_args_out;
   1919 	}
   1920 
   1921 	if (osp->os_orig_oo_name != oop->oo_name) {
   1922 		ep->error = EIO;
   1923 		goto no_args_out;
   1924 	}
   1925 
   1926 	/* setup the COMPOUND args */
   1927 	if (lrp)
   1928 		args.ctag = TAG_OPEN_DG_LOST;
   1929 	else
   1930 		args.ctag = TAG_OPEN_DG;
   1931 
   1932 	args.array_len = 3;
   1933 	args.array = argop;
   1934 
   1935 	/* putfh */
   1936 	argop[0].argop = OP_CPUTFH;
   1937 	argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
   1938 
   1939 	argop[1].argop = OP_GETATTR;
   1940 	argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
   1941 	argop[1].nfs_argop4_u.opgetattr.mi = mi;
   1942 
   1943 	ASSERT(mutex_owned(&osp->os_sync_lock));
   1944 	ASSERT(osp->os_delegation == FALSE);
   1945 
   1946 	/* open downgrade */
   1947 	argop[2].argop = OP_OPEN_DOWNGRADE;
   1948 	argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid;
   1949 	argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc;
   1950 	argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny;
   1951 	argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid;
   1952 
   1953 	t = gethrtime();
   1954 
   1955 	rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep);
   1956 
   1957 	if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
   1958 		nfs4_set_open_seqid(seqid, oop, args.ctag);
   1959 
   1960 	if ((ep->error == EACCES ||
   1961 	    (ep->error == 0 && res.status == NFS4ERR_ACCESS)) &&
   1962 	    cred_otw != cr) {
   1963 		crfree(cred_otw);
   1964 		cred_otw = cr;
   1965 		crhold(cred_otw);
   1966 		if (!ep->error)
   1967 			(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
   1968 		goto cred_retry;
   1969 	}
   1970 
   1971 	needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp);
   1972 
   1973 	if (needrecov && recov_credpp) {
   1974 		*recov_credpp = cred_otw;
   1975 		crhold(*recov_credpp);
   1976 		if (recov_seqidp)
   1977 			*recov_seqidp = seqid;
   1978 	}
   1979 
   1980 	if (!ep->error && !res.status) {
   1981 		/* get the open downgrade results */
   1982 		resop = &res.array[2];
   1983 		odg_res = &resop->nfs_resop4_u.opopen_downgrade;
   1984 
   1985 		osp->open_stateid = odg_res->open_stateid;
   1986 
   1987 		/* set the open streams new access/deny bits */
   1988 		if (access_close & FREAD)
   1989 			osp->os_share_acc_read--;
   1990 		if (access_close & FWRITE)
   1991 			osp->os_share_acc_write--;
   1992 		osp->os_share_deny_none--;
   1993 		osp->os_dc_openacc = new_acc;
   1994 
   1995 		nfs4_attr_cache(vp,
   1996 		    &res.array[1].nfs_resop4_u.opgetattr.ga_res,
   1997 		    t, cred_otw, TRUE, NULL);
   1998 	}
   1999 
   2000 	if (!ep->error)
   2001 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
   2002 
   2003 no_args_out:
   2004 	crfree(cred_otw);
   2005 }
   2006 
   2007 /*
   2008  * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out
   2009  * because the filesystem was forcibly unmounted) then we don't know if we
   2010  * potentially left state dangling on the server, therefore the recovery
   2011  * framework makes this call to resend the OPEN request and then undo it.
   2012  */
   2013 void
   2014 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp,
   2015 	nfs4_error_t *ep)
   2016 {
   2017 	COMPOUND4args_clnt	args;
   2018 	COMPOUND4res_clnt	res;
   2019 	nfs_argop4		argop[4];
   2020 	GETFH4res		*gf_res = NULL;
   2021 	OPEN4cargs		*open_args;
   2022 	OPEN4res		*op_res;
   2023 	char			*destcfp;
   2024 	int			destclen;
   2025 	nfs4_ga_res_t		*garp;
   2026 	vnode_t			*dvp = NULL, *vp = NULL;
   2027 	rnode4_t		*rp = NULL, *drp = NULL;
   2028 	cred_t			*cr = NULL;
   2029 	seqid4			seqid;
   2030 	nfs4_open_owner_t	*oop = NULL;
   2031 	nfs4_open_stream_t	*osp = NULL;
   2032 	component4		*srcfp;
   2033 	open_claim_type4	claim;
   2034 	mntinfo4_t		*mi;
   2035 	int			doqueue = 1;
   2036 	bool_t			retry_open = FALSE;
   2037 	int			created_osp = 0;
   2038 	hrtime_t		t;
   2039 	char 			*failed_msg = "";
   2040 	int			fh_different;
   2041 	int			reopen = 0;
   2042 
   2043 	nfs4_error_zinit(ep);
   2044 
   2045 	cr = resend_rqstp->lr_cr;
   2046 	dvp = resend_rqstp->lr_dvp;
   2047 
   2048 	vp = *vpp;
   2049 	if (vp) {
   2050 		ASSERT(nfs4_consistent_type(vp));
   2051 		rp = VTOR4(vp);
   2052 	}
   2053 
   2054 	if (rp) {
   2055 		/* If the file failed recovery, just quit. */
   2056 		mutex_enter(&rp->r_statelock);
   2057 		if (rp->r_flags & R4RECOVERR) {
   2058 			mutex_exit(&rp->r_statelock);
   2059 			ep->error = EIO;
   2060 			return;
   2061 		}
   2062 		mutex_exit(&rp->r_statelock);
   2063 	}
   2064 
   2065 	if (dvp) {
   2066 		drp = VTOR4(dvp);
   2067 		/* If the parent directory failed recovery, just quit. */
   2068 		mutex_enter(&drp->r_statelock);
   2069 		if (drp->r_flags & R4RECOVERR) {
   2070 			mutex_exit(&drp->r_statelock);
   2071 			ep->error = EIO;
   2072 			return;
   2073 		}
   2074 		mutex_exit(&drp->r_statelock);
   2075 	} else
   2076 		reopen = 1;	/* NULL dvp means this is a reopen */
   2077 
   2078 	claim = resend_rqstp->lr_oclaim;
   2079 	ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR);
   2080 
   2081 	args.ctag = TAG_OPEN_LOST;
   2082 	args.array_len = 4;
   2083 	args.array = argop;
   2084 
   2085 	argop[0].argop = OP_CPUTFH;
   2086 	if (reopen) {
   2087 		ASSERT(vp != NULL);
   2088 
   2089 		mi = VTOMI4(vp);
   2090 		/*
   2091 		 * if this is a file mount then
   2092 		 * use the mntinfo parentfh
   2093 		 */
   2094 		argop[0].nfs_argop4_u.opcputfh.sfh =
   2095 		    (vp->v_flag & VROOT) ? mi->mi_srvparentfh :
   2096 		    VTOSV(vp)->sv_dfh;
   2097 		args.ctag = TAG_REOPEN_LOST;
   2098 	} else {
   2099 		argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh;
   2100 		mi = VTOMI4(dvp);
   2101 	}
   2102 
   2103 	argop[1].argop = OP_COPEN;
   2104 	open_args = &argop[1].nfs_argop4_u.opcopen;
   2105 	open_args->claim = claim;
   2106 
   2107 	/*
   2108 	 * If we sent over a OPEN with CREATE then the only
   2109 	 * thing we care about is to not leave dangling state
   2110 	 * on the server, not whether the file we potentially
   2111 	 * created remains on the server.  So even though the
   2112 	 * lost open request specified a CREATE, we only wish
   2113 	 * to do a non-CREATE OPEN.
   2114 	 */
   2115 	open_args->opentype = OPEN4_NOCREATE;
   2116 
   2117 	srcfp = &resend_rqstp->lr_ofile;
   2118 	destclen = srcfp->utf8string_len;
   2119 	destcfp = kmem_alloc(destclen + 1, KM_SLEEP);
   2120 	bcopy(srcfp->utf8string_val, destcfp, destclen);
   2121 	destcfp[destclen] = '\0';
   2122 	if (claim == CLAIM_DELEGATE_CUR) {
   2123 		open_args->open_claim4_u.delegate_cur_info.delegate_stateid =
   2124 		    resend_rqstp->lr_ostateid;
   2125 		open_args->open_claim4_u.delegate_cur_info.cfile = destcfp;
   2126 	} else {
   2127 		open_args->open_claim4_u.cfile = destcfp;
   2128 	}
   2129 
   2130 	open_args->share_access = resend_rqstp->lr_oacc;
   2131 	open_args->share_deny = resend_rqstp->lr_odeny;
   2132 	oop = resend_rqstp->lr_oop;
   2133 	ASSERT(oop != NULL);
   2134 
   2135 	open_args->owner.clientid = mi2clientid(mi);
   2136 	/* this length never changes */
   2137 	open_args->owner.owner_len = sizeof (oop->oo_name);
   2138 	open_args->owner.owner_val =
   2139 	    kmem_alloc(open_args->owner.owner_len, KM_SLEEP);
   2140 
   2141 	ep->error = nfs4_start_open_seqid_sync(oop, mi);
   2142 	ASSERT(ep->error == 0);		/* recov thread always succeeds */
   2143 	/*
   2144 	 * We can get away with not saving the seqid upon detection
   2145 	 * of a lost request, and now just use the open owner's current
   2146 	 * seqid since we only allow one op OTW per seqid and lost
   2147 	 * requests are saved FIFO.
   2148 	 */
   2149 	seqid = nfs4_get_open_seqid(oop) + 1;
   2150 	open_args->seqid = seqid;
   2151 
   2152 	bcopy(&oop->oo_name, open_args->owner.owner_val,
   2153 	    open_args->owner.owner_len);
   2154 
   2155 	/* getfh */
   2156 	argop[2].argop = OP_GETFH;
   2157 
   2158 	/* Construct the getattr part of the compound */
   2159 	argop[3].argop = OP_GETATTR;
   2160 	argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
   2161 	argop[3].nfs_argop4_u.opgetattr.mi = mi;
   2162 
   2163 	res.array = NULL;
   2164 
   2165 	t = gethrtime();
   2166 
   2167 	rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
   2168 
   2169 	if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
   2170 		nfs4_set_open_seqid(seqid, oop, args.ctag);
   2171 
   2172 	NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   2173 	    "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status));
   2174 
   2175 	if (ep->error || res.status)
   2176 		goto err_out;
   2177 
   2178 	op_res = &res.array[1].nfs_resop4_u.opopen;
   2179 	gf_res = &res.array[2].nfs_resop4_u.opgetfh;
   2180 	garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res;
   2181 
   2182 	if (!vp) {
   2183 		int rnode_err = 0;
   2184 		nfs4_sharedfh_t *sfh;
   2185 
   2186 		/*
   2187 		 * If we can't decode all the attributes they are not usable,
   2188 		 * just make the vnode.
   2189 		 */
   2190 
   2191 		sfh = sfh4_get(&gf_res->object, VTOMI4(dvp));
   2192 		*vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp,
   2193 		    fn_get(VTOSV(dvp)->sv_name,
   2194 		    open_args->open_claim4_u.cfile, sfh));
   2195 		sfh4_rele(&sfh);
   2196 		NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   2197 		    "nfs4_resend_open_otw: made vp %p for file %s",
   2198 		    (void *)(*vpp), open_args->open_claim4_u.cfile));
   2199 
   2200 		if (ep->error)
   2201 			PURGE_ATTRCACHE4(*vpp);
   2202 
   2203 		/*
   2204 		 * For the newly created *vpp case, make sure the rnode
   2205 		 * isn't bad before using it.
   2206 		 */
   2207 		mutex_enter(&(VTOR4(*vpp))->r_statelock);
   2208 		if (VTOR4(*vpp)->r_flags & R4RECOVERR)
   2209 			rnode_err = EIO;
   2210 		mutex_exit(&(VTOR4(*vpp))->r_statelock);
   2211 
   2212 		if (rnode_err) {
   2213 			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   2214 			    "nfs4_resend_open_otw: rp %p is bad",
   2215 			    (void *)VTOR4(*vpp)));
   2216 			ep->error = rnode_err;
   2217 			goto err_out;
   2218 		}
   2219 
   2220 		vp = *vpp;
   2221 		rp = VTOR4(vp);
   2222 	}
   2223 
   2224 	if (reopen) {
   2225 		/*
   2226 		 * Check if the path we reopened really is the same
   2227 		 * file. We could end up in a situation were the file
   2228 		 * was removed and a new file created with the same name.
   2229 		 */
   2230 		(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
   2231 		fh_different =
   2232 		    (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0);
   2233 		if (fh_different) {
   2234 			if (mi->mi_fh_expire_type == FH4_PERSISTENT ||
   2235 			    mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) {
   2236 				/* Oops, we don't have the same file */
   2237 				if (mi->mi_fh_expire_type == FH4_PERSISTENT)
   2238 					failed_msg =
   2239 					    "Couldn't reopen: Persistant "
   2240 					    "file handle changed";
   2241 				else
   2242 					failed_msg =
   2243 					    "Couldn't reopen: Volatile "
   2244 					    "(no expire on open) file handle "
   2245 					    "changed";
   2246 
   2247 				nfs4_end_open_seqid_sync(oop);
   2248 				kmem_free(destcfp, destclen + 1);
   2249 				nfs4args_copen_free(open_args);
   2250 				(void) xdr_free(xdr_COMPOUND4res_clnt,
   2251 				    (caddr_t)&res);
   2252 				nfs_rw_exit(&mi->mi_fh_lock);
   2253 				nfs4_fail_recov(vp, failed_msg, ep->error,
   2254 				    ep->stat);
   2255 				return;
   2256 			} else {
   2257 				/*
   2258 				 * We have volatile file handles that don't
   2259 				 * compare.  If the fids are the same then we
   2260 				 * assume that the file handle expired but the
   2261 				 * renode still refers to the same file object.
   2262 				 *
   2263 				 * First check that we have fids or not.
   2264 				 * If we don't we have a dumb server so we will
   2265 				 * just assume every thing is ok for now.
   2266 				 */
   2267 				if (!ep->error &&
   2268 				    garp->n4g_va.va_mask & AT_NODEID &&
   2269 				    rp->r_attr.va_mask & AT_NODEID &&
   2270 				    rp->r_attr.va_nodeid !=
   2271 				    garp->n4g_va.va_nodeid) {
   2272 					/*
   2273 					 * We have fids, but they don't
   2274 					 * compare. So kill the file.
   2275 					 */
   2276 					failed_msg =
   2277 					    "Couldn't reopen: file handle "
   2278 					    "changed due to mismatched fids";
   2279 					nfs4_end_open_seqid_sync(oop);
   2280 					kmem_free(destcfp, destclen + 1);
   2281 					nfs4args_copen_free(open_args);
   2282 					(void) xdr_free(xdr_COMPOUND4res_clnt,
   2283 					    (caddr_t)&res);
   2284 					nfs_rw_exit(&mi->mi_fh_lock);
   2285 					nfs4_fail_recov(vp, failed_msg,
   2286 					    ep->error, ep->stat);
   2287 					return;
   2288 				} else {
   2289 					/*
   2290 					 * We have volatile file handles that
   2291 					 * refers to the same file (at least
   2292 					 * they have the same fid) or we don't
   2293 					 * have fids so we can't tell. :(. We'll
   2294 					 * be a kind and accepting client so
   2295 					 * we'll update the rnode's file
   2296 					 * handle with the otw handle.
   2297 					 *
   2298 					 * We need to drop mi->mi_fh_lock since
   2299 					 * sh4_update acquires it. Since there
   2300 					 * is only one recovery thread there is
   2301 					 * no race.
   2302 					 */
   2303 					nfs_rw_exit(&mi->mi_fh_lock);
   2304 					sfh4_update(rp->r_fh, &gf_res->object);
   2305 				}
   2306 			}
   2307 		} else {
   2308 			nfs_rw_exit(&mi->mi_fh_lock);
   2309 		}
   2310 	}
   2311 
   2312 	ASSERT(nfs4_consistent_type(vp));
   2313 
   2314 	if (op_res->rflags & OPEN4_RESULT_CONFIRM)
   2315 		nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE,
   2316 		    &retry_open, oop, TRUE, ep, NULL);
   2317 	if (ep->error || ep->stat) {
   2318 		nfs4_end_open_seqid_sync(oop);
   2319 		kmem_free(destcfp, destclen + 1);
   2320 		nfs4args_copen_free(open_args);
   2321 		if (!ep->error)
   2322 			(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
   2323 		return;
   2324 	}
   2325 
   2326 	if (reopen) {
   2327 		/*
   2328 		 * Doing a reopen here so the osp should already exist.
   2329 		 * If not, something changed or went very wrong.
   2330 		 *
   2331 		 * returns with 'os_sync_lock' held
   2332 		 */
   2333 		osp = find_open_stream(oop, rp);
   2334 		if (!osp) {
   2335 			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   2336 			    "nfs4_resend_open_otw: couldn't find osp"));
   2337 			ep->error = EINVAL;
   2338 			goto err_out;
   2339 		}
   2340 		osp->os_open_ref_count++;
   2341 	} else {
   2342 		mutex_enter(&oop->oo_lock);
   2343 		oop->oo_just_created = NFS4_PERM_CREATED;
   2344 		mutex_exit(&oop->oo_lock);
   2345 
   2346 		/* returns with 'os_sync_lock' held */
   2347 		osp = find_or_create_open_stream(oop, rp, &created_osp);
   2348 		if (!osp) {
   2349 			NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
   2350 			    "nfs4_resend_open_otw: couldn't create osp"));
   2351 			ep->error = EINVAL;
   2352 			goto err_out;
   2353 		}
   2354 	}
   2355 
   2356 	osp->open_stateid = op_res->stateid;
   2357 	osp->os_delegation = FALSE;
   2358 	/*
   2359 	 * Need to reset this bitfield for the possible case where we were
   2360 	 * going to OTW CLOSE the file, got a non-recoverable error, and before
   2361 	 * we could retry the CLOSE, OPENed the file again.
   2362 	 */
   2363 	ASSERT(osp->os_open_owner->oo_seqid_inuse);
   2364 	osp->os_final_close = 0;
   2365 	osp->os_force_close = 0;
   2366 
   2367 	if (!reopen) {
   2368 		if (open_args->share_access & OPEN4_SHARE_ACCESS_READ)
   2369 			osp->os_share_acc_read++;
   2370 		if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE)
   2371 			osp->os_share_acc_write++;
   2372 		osp->os_share_deny_none++;
   2373 	}
   2374 
   2375 	mutex_exit(&osp->os_sync_lock);
   2376 	if (created_osp)
   2377 		nfs4_inc_state_ref_count(mi);
   2378 	open_stream_rele(osp, rp);
   2379 
   2380 	nfs4_end_open_seqid_sync(oop);
   2381 
   2382 	/* accept delegation, if any */
   2383 	nfs4_delegation_accept(rp, claim, op_res, garp, cr);
   2384 
   2385 	kmem_free(destcfp, destclen + 1);
   2386 	nfs4args_copen_free(open_args);
   2387 
   2388 	if (claim == CLAIM_DELEGATE_CUR)
   2389 		nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
   2390 	else
   2391 		PURGE_ATTRCACHE4(vp);
   2392 
   2393 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
   2394 
   2395 	ASSERT(nfs4_consistent_type(vp));
   2396 
   2397 	return;
   2398 
   2399 err_out:
   2400 	nfs4_end_open_seqid_sync(oop);
   2401 	kmem_free(destcfp, destclen + 1);
   2402 	nfs4args_copen_free(open_args);
   2403 	if (!ep->error)
   2404 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
   2405 }
   2406