Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/systm.h>
     27 #include <rpc/auth.h>
     28 #include <rpc/clnt.h>
     29 #include <nfs/nfs4_kprot.h>
     30 #include <nfs/nfs4.h>
     31 #include <nfs/lm.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/disp.h>
     34 #include <sys/sdt.h>
     35 
     36 #include <sys/pathname.h>
     37 
     38 #include <sys/strsubr.h>
     39 #include <sys/ddi.h>
     40 
     41 #include <sys/vnode.h>
     42 #include <sys/sdt.h>
     43 #include <inet/common.h>
     44 #include <inet/ip.h>
     45 #include <inet/ip6.h>
     46 
     47 #define	MAX_READ_DELEGATIONS 5
     48 
     49 krwlock_t rfs4_deleg_policy_lock;
     50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
     51 static int rfs4_deleg_wlp = 5;
     52 kmutex_t rfs4_deleg_lock;
     53 static int rfs4_deleg_disabled;
     54 static int rfs4_max_setup_cb_tries = 5;
     55 
     56 #ifdef DEBUG
     57 
     58 static int rfs4_test_cbgetattr_fail = 0;
     59 int rfs4_cb_null;
     60 int rfs4_cb_debug;
     61 int rfs4_deleg_debug;
     62 
     63 #endif
     64 
     65 static void rfs4_recall_file(rfs4_file_t *,
     66     void (*recall)(rfs4_deleg_state_t *, bool_t),
     67     bool_t, rfs4_client_t *);
     68 static	void		rfs4_revoke_file(rfs4_file_t *);
     69 static	void		rfs4_cb_chflush(rfs4_cbinfo_t *);
     70 static	CLIENT		*rfs4_cb_getch(rfs4_cbinfo_t *);
     71 static	void		rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
     72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
     73     open_delegation_type4, int *);
     74 
     75 /*
     76  * Convert a universal address to an transport specific
     77  * address using inet_pton.
     78  */
     79 static int
     80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
     81 {
     82 	int dots = 0, i, j, len, k;
     83 	unsigned char c;
     84 	in_port_t port = 0;
     85 
     86 	len = strlen(ua);
     87 
     88 	for (i = len-1; i >= 0; i--) {
     89 
     90 		if (ua[i] == '.')
     91 			dots++;
     92 
     93 		if (dots == 2) {
     94 
     95 			ua[i] = '\0';
     96 			/*
     97 			 * We use k to remember were to stick '.' back, since
     98 			 * ua was kmem_allocateded from the pool len+1.
     99 			 */
    100 			k = i;
    101 			if (inet_pton(af, ua, ap) == 1) {
    102 
    103 				c = 0;
    104 
    105 				for (j = i+1; j < len; j++) {
    106 					if (ua[j] == '.') {
    107 						port = c << 8;
    108 						c = 0;
    109 					} else if (ua[j] >= '0' &&
    110 					    ua[j] <= '9') {
    111 						c *= 10;
    112 						c += ua[j] - '0';
    113 					} else {
    114 						ua[k] = '.';
    115 						return (EINVAL);
    116 					}
    117 				}
    118 				port += c;
    119 
    120 
    121 				/* reset to network order */
    122 				if (af == AF_INET) {
    123 					*(uint32_t *)ap =
    124 					    htonl(*(uint32_t *)ap);
    125 					*pp = htons(port);
    126 				} else {
    127 					int ix;
    128 					uint16_t *sap;
    129 
    130 					for (sap = ap, ix = 0; ix <
    131 					    sizeof (struct in6_addr) /
    132 					    sizeof (uint16_t); ix++)
    133 						sap[ix] = htons(sap[ix]);
    134 
    135 					*pp = htons(port);
    136 				}
    137 
    138 				ua[k] = '.';
    139 				return (0);
    140 			} else {
    141 				ua[k] = '.';
    142 				return (EINVAL);
    143 			}
    144 		}
    145 	}
    146 
    147 	return (EINVAL);
    148 }
    149 
    150 /*
    151  * Update the delegation policy with the
    152  * value of "new_policy"
    153  */
    154 void
    155 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
    156 {
    157 	rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
    158 	rfs4_deleg_policy = new_policy;
    159 	rw_exit(&rfs4_deleg_policy_lock);
    160 }
    161 
    162 void
    163 rfs4_hold_deleg_policy(void)
    164 {
    165 	rw_enter(&rfs4_deleg_policy_lock, RW_READER);
    166 }
    167 
    168 void
    169 rfs4_rele_deleg_policy(void)
    170 {
    171 	rw_exit(&rfs4_deleg_policy_lock);
    172 }
    173 
    174 
    175 /*
    176  * This free function is to be used when the client struct is being
    177  * released and nothing at all is needed of the callback info any
    178  * longer.
    179  */
    180 void
    181 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
    182 {
    183 	char *addr = cbp->cb_callback.cb_location.r_addr;
    184 	char *netid = cbp->cb_callback.cb_location.r_netid;
    185 
    186 	/* Free old address if any */
    187 
    188 	if (addr)
    189 		kmem_free(addr, strlen(addr) + 1);
    190 	if (netid)
    191 		kmem_free(netid, strlen(netid) + 1);
    192 
    193 	addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
    194 	netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
    195 
    196 	if (addr)
    197 		kmem_free(addr, strlen(addr) + 1);
    198 	if (netid)
    199 		kmem_free(netid, strlen(netid) + 1);
    200 
    201 	if (cbp->cb_chc_free) {
    202 		rfs4_cb_chflush(cbp);
    203 	}
    204 }
    205 
    206 /*
    207  * The server uses this to check the callback path supplied by the
    208  * client.  The callback connection is marked "in progress" while this
    209  * work is going on and then eventually marked either OK or FAILED.
    210  * This work can be done as part of a separate thread and at the end
    211  * of this the thread will exit or it may be done such that the caller
    212  * will continue with other work.
    213  */
    214 static void
    215 rfs4_do_cb_null(rfs4_client_t *cp)
    216 {
    217 	struct timeval tv;
    218 	CLIENT *ch;
    219 	rfs4_cbstate_t newstate;
    220 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
    221 
    222 	mutex_enter(cbp->cb_lock);
    223 	/* If another thread is doing CB_NULL RPC then return */
    224 	if (cbp->cb_nullcaller == TRUE) {
    225 		mutex_exit(cbp->cb_lock);
    226 		rfs4_client_rele(cp);
    227 		return;
    228 	}
    229 
    230 	/* Mark the cbinfo as having a thread in the NULL callback */
    231 	cbp->cb_nullcaller = TRUE;
    232 
    233 	/*
    234 	 * Are there other threads still using the cbinfo client
    235 	 * handles?  If so, this thread must wait before going and
    236 	 * mucking aroiund with the callback information
    237 	 */
    238 	while (cbp->cb_refcnt != 0)
    239 		cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
    240 
    241 	/*
    242 	 * This thread itself may find that new callback info has
    243 	 * arrived and is set up to handle this case and redrive the
    244 	 * call to the client's callback server.
    245 	 */
    246 retry:
    247 	if (cbp->cb_newer.cb_new == TRUE &&
    248 	    cbp->cb_newer.cb_confirmed == TRUE) {
    249 		char *addr = cbp->cb_callback.cb_location.r_addr;
    250 		char *netid = cbp->cb_callback.cb_location.r_netid;
    251 
    252 		/*
    253 		 * Free the old stuff if it exists; may be the first
    254 		 * time through this path
    255 		 */
    256 		if (addr)
    257 			kmem_free(addr, strlen(addr) + 1);
    258 		if (netid)
    259 			kmem_free(netid, strlen(netid) + 1);
    260 
    261 		/* Move over the addr/netid */
    262 		cbp->cb_callback.cb_location.r_addr =
    263 		    cbp->cb_newer.cb_callback.cb_location.r_addr;
    264 		cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
    265 		cbp->cb_callback.cb_location.r_netid =
    266 		    cbp->cb_newer.cb_callback.cb_location.r_netid;
    267 		cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
    268 
    269 		/* Get the program number */
    270 		cbp->cb_callback.cb_program =
    271 		    cbp->cb_newer.cb_callback.cb_program;
    272 		cbp->cb_newer.cb_callback.cb_program = 0;
    273 
    274 		/* Don't forget the protocol's "cb_ident" field */
    275 		cbp->cb_ident = cbp->cb_newer.cb_ident;
    276 		cbp->cb_newer.cb_ident = 0;
    277 
    278 		/* no longer new */
    279 		cbp->cb_newer.cb_new = FALSE;
    280 		cbp->cb_newer.cb_confirmed = FALSE;
    281 
    282 		/* get rid of the old client handles that may exist */
    283 		rfs4_cb_chflush(cbp);
    284 
    285 		cbp->cb_state = CB_NONE;
    286 		cbp->cb_timefailed = 0; /* reset the clock */
    287 		cbp->cb_notified_of_cb_path_down = TRUE;
    288 	}
    289 
    290 	if (cbp->cb_state != CB_NONE) {
    291 		cv_broadcast(cbp->cb_cv);	/* let the others know */
    292 		cbp->cb_nullcaller = FALSE;
    293 		mutex_exit(cbp->cb_lock);
    294 		rfs4_client_rele(cp);
    295 		return;
    296 	}
    297 
    298 	/* mark rfs4_client_t as CALLBACK NULL in progress */
    299 	cbp->cb_state = CB_INPROG;
    300 	mutex_exit(cbp->cb_lock);
    301 
    302 	/* get/generate a client handle */
    303 	if ((ch = rfs4_cb_getch(cbp)) == NULL) {
    304 		mutex_enter(cbp->cb_lock);
    305 		cbp->cb_state = CB_BAD;
    306 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
    307 		goto retry;
    308 	}
    309 
    310 
    311 	tv.tv_sec = 30;
    312 	tv.tv_usec = 0;
    313 	if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
    314 		newstate = CB_BAD;
    315 	} else {
    316 		newstate = CB_OK;
    317 #ifdef	DEBUG
    318 		rfs4_cb_null++;
    319 #endif
    320 	}
    321 
    322 	/* Check to see if the client has specified new callback info */
    323 	mutex_enter(cbp->cb_lock);
    324 	rfs4_cb_freech(cbp, ch, TRUE);
    325 	if (cbp->cb_newer.cb_new == TRUE &&
    326 	    cbp->cb_newer.cb_confirmed == TRUE) {
    327 		goto retry;	/* give the CB_NULL another chance */
    328 	}
    329 
    330 	cbp->cb_state = newstate;
    331 	if (cbp->cb_state == CB_BAD)
    332 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
    333 
    334 	cv_broadcast(cbp->cb_cv);	/* start up the other threads */
    335 	cbp->cb_nullcaller = FALSE;
    336 	mutex_exit(cbp->cb_lock);
    337 
    338 	rfs4_client_rele(cp);
    339 }
    340 
    341 /*
    342  * Given a client struct, inspect the callback info to see if the
    343  * callback path is up and available.
    344  *
    345  * If new callback path is available and no one has set it up then
    346  * try to set it up. If setup is not successful after 5 tries (5 secs)
    347  * then gives up and returns NULL.
    348  *
    349  * If callback path is being initialized, then wait for the CB_NULL RPC
    350  * call to occur.
    351  */
    352 static rfs4_cbinfo_t *
    353 rfs4_cbinfo_hold(rfs4_client_t *cp)
    354 {
    355 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
    356 	int retries = 0;
    357 
    358 	mutex_enter(cbp->cb_lock);
    359 
    360 	while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
    361 		/*
    362 		 * Looks like a new callback path may be available and
    363 		 * noone has set it up.
    364 		 */
    365 		mutex_exit(cbp->cb_lock);
    366 		rfs4_dbe_hold(cp->rc_dbe);
    367 		rfs4_do_cb_null(cp); /* caller will release client hold */
    368 
    369 		mutex_enter(cbp->cb_lock);
    370 		/*
    371 		 * If callback path is no longer new, or it's being setup
    372 		 * then stop and wait for it to be done.
    373 		 */
    374 		if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
    375 			break;
    376 		mutex_exit(cbp->cb_lock);
    377 
    378 		if (++retries >= rfs4_max_setup_cb_tries)
    379 			return (NULL);
    380 		delay(hz);
    381 		mutex_enter(cbp->cb_lock);
    382 	}
    383 
    384 	/* Is there a thread working on doing the CB_NULL RPC? */
    385 	if (cbp->cb_nullcaller == TRUE)
    386 		cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
    387 
    388 	/* If the callback path is not okay (up and running), just quit */
    389 	if (cbp->cb_state != CB_OK) {
    390 		mutex_exit(cbp->cb_lock);
    391 		return (NULL);
    392 	}
    393 
    394 	/* Let someone know we are using the current callback info */
    395 	cbp->cb_refcnt++;
    396 	mutex_exit(cbp->cb_lock);
    397 	return (cbp);
    398 }
    399 
    400 /*
    401  * The caller is done with the callback info.  It may be that the
    402  * caller's RPC failed and the NFSv4 client has actually provided new
    403  * callback information.  If so, let the caller know so they can
    404  * advantage of this and maybe retry the RPC that originally failed.
    405  */
    406 static int
    407 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
    408 {
    409 	int cb_new = FALSE;
    410 
    411 	mutex_enter(cbp->cb_lock);
    412 
    413 	/* The caller gets a chance to mark the callback info as bad */
    414 	if (newstate != CB_NOCHANGE)
    415 		cbp->cb_state = newstate;
    416 	if (newstate == CB_FAILED) {
    417 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
    418 		cbp->cb_notified_of_cb_path_down = FALSE;
    419 	}
    420 
    421 	cbp->cb_refcnt--;	/* no longer using the information */
    422 
    423 	/*
    424 	 * A thread may be waiting on this one to finish and if so,
    425 	 * let it know that it is okay to do the CB_NULL to the
    426 	 * client's callback server.
    427 	 */
    428 	if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
    429 		cv_broadcast(cbp->cb_cv_nullcaller);
    430 
    431 	/*
    432 	 * If this is the last thread to use the callback info and
    433 	 * there is new callback information to try and no thread is
    434 	 * there ready to do the CB_NULL, then return true to teh
    435 	 * caller so they can do the CB_NULL
    436 	 */
    437 	if (cbp->cb_refcnt == 0 &&
    438 	    cbp->cb_nullcaller == FALSE &&
    439 	    cbp->cb_newer.cb_new == TRUE &&
    440 	    cbp->cb_newer.cb_confirmed == TRUE)
    441 		cb_new = TRUE;
    442 
    443 	mutex_exit(cbp->cb_lock);
    444 
    445 	return (cb_new);
    446 }
    447 
    448 /*
    449  * Given the information in the callback info struct, create a client
    450  * handle that can be used by the server for its callback path.
    451  */
    452 static CLIENT *
    453 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
    454 {
    455 	struct knetconfig knc;
    456 	vnode_t *vp;
    457 	struct sockaddr_in addr4;
    458 	struct sockaddr_in6 addr6;
    459 	void *addr, *taddr;
    460 	in_port_t *pp;
    461 	int af;
    462 	char *devnam;
    463 	struct netbuf nb;
    464 	int size;
    465 	CLIENT *ch = NULL;
    466 	int useresvport = 0;
    467 
    468 	mutex_enter(cbp->cb_lock);
    469 
    470 	if (cbp->cb_callback.cb_location.r_netid == NULL ||
    471 	    cbp->cb_callback.cb_location.r_addr == NULL) {
    472 		goto cb_init_out;
    473 	}
    474 
    475 	if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
    476 		knc.knc_semantics = NC_TPI_COTS;
    477 		knc.knc_protofmly = "inet";
    478 		knc.knc_proto = "tcp";
    479 		devnam = "/dev/tcp";
    480 		af = AF_INET;
    481 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
    482 	    == 0) {
    483 		knc.knc_semantics = NC_TPI_CLTS;
    484 		knc.knc_protofmly = "inet";
    485 		knc.knc_proto = "udp";
    486 		devnam = "/dev/udp";
    487 		af = AF_INET;
    488 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
    489 	    == 0) {
    490 		knc.knc_semantics = NC_TPI_COTS;
    491 		knc.knc_protofmly = "inet6";
    492 		knc.knc_proto = "tcp";
    493 		devnam = "/dev/tcp6";
    494 		af = AF_INET6;
    495 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
    496 	    == 0) {
    497 		knc.knc_semantics = NC_TPI_CLTS;
    498 		knc.knc_protofmly = "inet6";
    499 		knc.knc_proto = "udp";
    500 		devnam = "/dev/udp6";
    501 		af = AF_INET6;
    502 	} else {
    503 		goto cb_init_out;
    504 	}
    505 
    506 	if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
    507 
    508 		goto cb_init_out;
    509 	}
    510 
    511 	if (vp->v_type != VCHR) {
    512 		VN_RELE(vp);
    513 		goto cb_init_out;
    514 	}
    515 
    516 	knc.knc_rdev = vp->v_rdev;
    517 
    518 	VN_RELE(vp);
    519 
    520 	if (af == AF_INET) {
    521 		size = sizeof (addr4);
    522 		bzero(&addr4, size);
    523 		addr4.sin_family = (sa_family_t)af;
    524 		addr = &addr4.sin_addr;
    525 		pp = &addr4.sin_port;
    526 		taddr = &addr4;
    527 	} else /* AF_INET6 */ {
    528 		size = sizeof (addr6);
    529 		bzero(&addr6, size);
    530 		addr6.sin6_family = (sa_family_t)af;
    531 		addr = &addr6.sin6_addr;
    532 		pp = &addr6.sin6_port;
    533 		taddr = &addr6;
    534 	}
    535 
    536 	if (uaddr2sockaddr(af,
    537 	    cbp->cb_callback.cb_location.r_addr, addr, pp)) {
    538 
    539 		goto cb_init_out;
    540 	}
    541 
    542 
    543 	nb.maxlen = nb.len = size;
    544 	nb.buf = (char *)taddr;
    545 
    546 	if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
    547 	    NFS_CB, 0, 0, curthread->t_cred, &ch)) {
    548 
    549 		ch = NULL;
    550 	}
    551 
    552 	/* turn off reserved port usage */
    553 	(void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
    554 
    555 cb_init_out:
    556 	mutex_exit(cbp->cb_lock);
    557 	return (ch);
    558 }
    559 
    560 /*
    561  * Iterate over the client handle cache and
    562  * destroy it.
    563  */
    564 static void
    565 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
    566 {
    567 	CLIENT *ch;
    568 
    569 	while (cbp->cb_chc_free) {
    570 		cbp->cb_chc_free--;
    571 		ch = cbp->cb_chc[cbp->cb_chc_free];
    572 		cbp->cb_chc[cbp->cb_chc_free] = NULL;
    573 		if (ch) {
    574 			if (ch->cl_auth)
    575 				auth_destroy(ch->cl_auth);
    576 			clnt_destroy(ch);
    577 		}
    578 	}
    579 }
    580 
    581 /*
    582  * Return a client handle, either from a the small
    583  * rfs4_client_t cache or one that we just created.
    584  */
    585 static CLIENT *
    586 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
    587 {
    588 	CLIENT *cbch = NULL;
    589 	uint32_t zilch = 0;
    590 
    591 	mutex_enter(cbp->cb_lock);
    592 
    593 	if (cbp->cb_chc_free) {
    594 		cbp->cb_chc_free--;
    595 		cbch = cbp->cb_chc[ cbp->cb_chc_free ];
    596 		mutex_exit(cbp->cb_lock);
    597 		(void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
    598 		return (cbch);
    599 	}
    600 
    601 	mutex_exit(cbp->cb_lock);
    602 
    603 	/* none free so make it now */
    604 	cbch = rfs4_cbch_init(cbp);
    605 
    606 	return (cbch);
    607 }
    608 
    609 /*
    610  * Return the client handle to the small cache or
    611  * destroy it.
    612  */
    613 static void
    614 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
    615 {
    616 	if (lockheld == FALSE)
    617 		mutex_enter(cbp->cb_lock);
    618 
    619 	if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
    620 		cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
    621 		if (lockheld == FALSE)
    622 			mutex_exit(cbp->cb_lock);
    623 		return;
    624 	}
    625 	if (lockheld == FALSE)
    626 		mutex_exit(cbp->cb_lock);
    627 
    628 	/*
    629 	 * cache maxed out of free entries, obliterate
    630 	 * this client handle, destroy it, throw it away.
    631 	 */
    632 	if (ch->cl_auth)
    633 		auth_destroy(ch->cl_auth);
    634 	clnt_destroy(ch);
    635 }
    636 
    637 /*
    638  * With the supplied callback information - initialize the client
    639  * callback data.  If there is a callback in progress, save the
    640  * callback info so that a thread can pick it up in the future.
    641  */
    642 void
    643 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
    644 {
    645 	char *addr = NULL;
    646 	char *netid = NULL;
    647 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
    648 	size_t len;
    649 
    650 	/* Set the call back for the client */
    651 	if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
    652 	    cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
    653 		len = strlen(cb->cb_location.r_addr) + 1;
    654 		addr = kmem_alloc(len, KM_SLEEP);
    655 		bcopy(cb->cb_location.r_addr, addr, len);
    656 		len = strlen(cb->cb_location.r_netid) + 1;
    657 		netid = kmem_alloc(len, KM_SLEEP);
    658 		bcopy(cb->cb_location.r_netid, netid, len);
    659 	}
    660 	/* ready to save the new information but first free old, if exists */
    661 	mutex_enter(cbp->cb_lock);
    662 
    663 	cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
    664 
    665 	if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
    666 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
    667 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
    668 	cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
    669 
    670 	if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
    671 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
    672 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
    673 	cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
    674 
    675 	cbp->cb_newer.cb_ident = cb_ident;
    676 
    677 	if (addr && *addr && netid && *netid) {
    678 		cbp->cb_newer.cb_new = TRUE;
    679 		cbp->cb_newer.cb_confirmed = FALSE;
    680 	} else {
    681 		cbp->cb_newer.cb_new = FALSE;
    682 		cbp->cb_newer.cb_confirmed = FALSE;
    683 	}
    684 
    685 	mutex_exit(cbp->cb_lock);
    686 }
    687 
    688 /*
    689  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
    690  * information may have been provided on SETCLIENTID and this call
    691  * marks that information as confirmed and then starts a thread to
    692  * test the callback path.
    693  */
    694 void
    695 rfs4_deleg_cb_check(rfs4_client_t *cp)
    696 {
    697 	if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
    698 		return;
    699 
    700 	cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
    701 
    702 	rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
    703 
    704 	(void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
    705 	    minclsyspri);
    706 }
    707 
    708 static void
    709 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
    710 {
    711 	CB_RECALL4args	*rec_argp;
    712 
    713 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
    714 	if (rec_argp->fh.nfs_fh4_val)
    715 		kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
    716 }
    717 
    718 /* ARGSUSED */
    719 static void
    720 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
    721 {
    722 	CB_GETATTR4args *argp;
    723 
    724 	argp = &argop->nfs_cb_argop4_u.opcbgetattr;
    725 	if (argp->fh.nfs_fh4_val)
    726 		kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
    727 }
    728 
    729 static void
    730 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
    731 {
    732 	int i, arglen;
    733 	nfs_cb_argop4 *argop;
    734 
    735 	/*
    736 	 * First free any special args alloc'd for specific ops.
    737 	 */
    738 	arglen = args->array_len;
    739 	argop = args->array;
    740 	for (i = 0; i < arglen; i++, argop++) {
    741 
    742 		switch (argop->argop) {
    743 		case OP_CB_RECALL:
    744 			rfs4args_cb_recall_free(argop);
    745 			break;
    746 
    747 		case OP_CB_GETATTR:
    748 			rfs4args_cb_getattr_free(argop);
    749 			break;
    750 
    751 		default:
    752 			return;
    753 		}
    754 	}
    755 
    756 	if (args->tag.utf8string_len > 0)
    757 		UTF8STRING_FREE(args->tag)
    758 
    759 	kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
    760 	if (resp)
    761 		(void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
    762 }
    763 
    764 /*
    765  * General callback routine for the server to the client.
    766  */
    767 static enum clnt_stat
    768 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
    769     CB_COMPOUND4res *res, struct timeval timeout)
    770 {
    771 	rfs4_cbinfo_t *cbp;
    772 	CLIENT *ch;
    773 	/* start with this in case cb_getch() fails */
    774 	enum clnt_stat	stat = RPC_FAILED;
    775 
    776 	res->tag.utf8string_val = NULL;
    777 	res->array = NULL;
    778 
    779 retry:
    780 	cbp = rfs4_cbinfo_hold(cp);
    781 	if (cbp == NULL)
    782 		return (stat);
    783 
    784 	/* get a client handle */
    785 	if ((ch = rfs4_cb_getch(cbp)) != NULL) {
    786 		/*
    787 		 * reset the cb_ident since it may have changed in
    788 		 * rfs4_cbinfo_hold()
    789 		 */
    790 		args->callback_ident = cbp->cb_ident;
    791 
    792 		stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
    793 		    (caddr_t)args, xdr_CB_COMPOUND4res,
    794 		    (caddr_t)res, timeout);
    795 
    796 		/* free client handle */
    797 		rfs4_cb_freech(cbp, ch, FALSE);
    798 	}
    799 
    800 	/*
    801 	 * If the rele says that there may be new callback info then
    802 	 * retry this sequence and it may succeed as a result of the
    803 	 * new callback path
    804 	 */
    805 	if (rfs4_cbinfo_rele(cbp,
    806 	    (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
    807 		goto retry;
    808 
    809 	return (stat);
    810 }
    811 
    812 /*
    813  * Used by the NFSv4 server to get attributes for a file while
    814  * handling the case where a file has been write delegated.  For the
    815  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
    816  * not undertaken.  This call site is maintained in case the server is
    817  * updated in the future to handle write delegation space guarantees.
    818  */
    819 nfsstat4
    820 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
    821 {
    822 
    823 	int error;
    824 
    825 	error = VOP_GETATTR(vp, vap, flag, cr, NULL);
    826 	return (puterrno4(error));
    827 }
    828 
    829 /*
    830  * This is used everywhere in the v2/v3 server to allow the
    831  * integration of all NFS versions and the support of delegation.  For
    832  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
    833  * in the future to provide space guarantees for write delegations
    834  * then this call site should be expanded to interact with the client.
    835  */
    836 int
    837 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
    838 {
    839 	return (VOP_GETATTR(vp, vap, flag, cr, NULL));
    840 }
    841 
    842 /*
    843  * Place the actual cb_recall otw call to client.
    844  */
    845 static void
    846 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
    847 {
    848 	CB_COMPOUND4args	cb4_args;
    849 	CB_COMPOUND4res		cb4_res;
    850 	CB_RECALL4args		*rec_argp;
    851 	CB_RECALL4res		*rec_resp;
    852 	nfs_cb_argop4		*argop;
    853 	int			numops;
    854 	int			argoplist_size;
    855 	struct timeval		timeout;
    856 	nfs_fh4			*fhp;
    857 	enum clnt_stat		call_stat;
    858 
    859 	/*
    860 	 * set up the compound args
    861 	 */
    862 	numops = 1;	/* CB_RECALL only */
    863 
    864 	argoplist_size = numops * sizeof (nfs_cb_argop4);
    865 	argop = kmem_zalloc(argoplist_size, KM_SLEEP);
    866 	argop->argop = OP_CB_RECALL;
    867 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
    868 
    869 	(void) str_to_utf8("cb_recall", &cb4_args.tag);
    870 	cb4_args.minorversion = CB4_MINORVERSION;
    871 	/* cb4_args.callback_ident is set in rfs4_do_callback() */
    872 	cb4_args.array_len = numops;
    873 	cb4_args.array = argop;
    874 
    875 	/*
    876 	 * fill in the args struct
    877 	 */
    878 	bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
    879 	rec_argp->truncate = trunc;
    880 
    881 	fhp = &dsp->rds_finfo->rf_filehandle;
    882 	rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
    883 	    fhp->nfs_fh4_len, KM_SLEEP);
    884 	nfs_fh4_copy(fhp, &rec_argp->fh);
    885 
    886 	/* Keep track of when we did this for observability */
    887 	dsp->rds_time_recalled = gethrestime_sec();
    888 
    889 	/*
    890 	 * Set up the timeout for the callback and make the actual call.
    891 	 * Timeout will be 80% of the lease period for this server.
    892 	 */
    893 	timeout.tv_sec = (rfs4_lease_time * 80) / 100;
    894 	timeout.tv_usec = 0;
    895 
    896 	DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
    897 	    rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
    898 
    899 	call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
    900 	    timeout);
    901 
    902 	rec_resp = (cb4_res.array_len == 0) ? NULL :
    903 	    &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
    904 
    905 	DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
    906 	    rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
    907 
    908 	if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
    909 		rfs4_return_deleg(dsp, TRUE);
    910 	}
    911 
    912 	rfs4freeargres(&cb4_args, &cb4_res);
    913 }
    914 
    915 struct recall_arg {
    916 	rfs4_deleg_state_t *dsp;
    917 	void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
    918 	bool_t trunc;
    919 };
    920 
    921 static void
    922 do_recall(struct recall_arg *arg)
    923 {
    924 	rfs4_deleg_state_t *dsp = arg->dsp;
    925 	rfs4_file_t *fp = dsp->rds_finfo;
    926 	callb_cpr_t cpr_info;
    927 	kmutex_t cpr_lock;
    928 
    929 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
    930 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
    931 
    932 	/*
    933 	 * It is possible that before this thread starts
    934 	 * the client has send us a return_delegation, and
    935 	 * if that is the case we do not need to send the
    936 	 * recall callback.
    937 	 */
    938 	if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
    939 		DTRACE_PROBE3(nfss__i__recall,
    940 		    struct recall_arg *, arg,
    941 		    struct rfs4_deleg_state_t *, dsp,
    942 		    struct rfs4_file_t *, fp);
    943 
    944 		if (arg->recall)
    945 			(void) (*arg->recall)(dsp, arg->trunc);
    946 	}
    947 
    948 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
    949 	/*
    950 	 * Recall count may go negative if the parent thread that is
    951 	 * creating the individual callback threads does not modify
    952 	 * the recall_count field before the callback thread actually
    953 	 * gets a response from the CB_RECALL
    954 	 */
    955 	fp->rf_dinfo.rd_recall_count--;
    956 	if (fp->rf_dinfo.rd_recall_count == 0)
    957 		cv_signal(fp->rf_dinfo.rd_recall_cv);
    958 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
    959 
    960 	mutex_enter(&cpr_lock);
    961 	CALLB_CPR_EXIT(&cpr_info);
    962 	mutex_destroy(&cpr_lock);
    963 
    964 	rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
    965 
    966 	kmem_free(arg, sizeof (struct recall_arg));
    967 }
    968 
    969 struct master_recall_args {
    970     rfs4_file_t *fp;
    971     void (*recall)(rfs4_deleg_state_t *, bool_t);
    972     bool_t trunc;
    973 };
    974 
    975 static void
    976 do_recall_file(struct master_recall_args *map)
    977 {
    978 	rfs4_file_t *fp = map->fp;
    979 	rfs4_deleg_state_t *dsp;
    980 	struct recall_arg *arg;
    981 	callb_cpr_t cpr_info;
    982 	kmutex_t cpr_lock;
    983 	int32_t recall_count;
    984 
    985 	rfs4_dbe_lock(fp->rf_dbe);
    986 
    987 	/* Recall already in progress ? */
    988 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
    989 	if (fp->rf_dinfo.rd_recall_count != 0) {
    990 		mutex_exit(fp->rf_dinfo.rd_recall_lock);
    991 		rfs4_dbe_rele_nolock(fp->rf_dbe);
    992 		rfs4_dbe_unlock(fp->rf_dbe);
    993 		kmem_free(map, sizeof (struct master_recall_args));
    994 		return;
    995 	}
    996 
    997 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
    998 
    999 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
   1000 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,	"v4RecallFile");
   1001 
   1002 	recall_count = 0;
   1003 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
   1004 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
   1005 
   1006 		rfs4_dbe_lock(dsp->rds_dbe);
   1007 		/*
   1008 		 * if this delegation state
   1009 		 * is being reaped skip it
   1010 		 */
   1011 		if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
   1012 			rfs4_dbe_unlock(dsp->rds_dbe);
   1013 			continue;
   1014 		}
   1015 
   1016 		/* hold for receiving thread */
   1017 		rfs4_dbe_hold(dsp->rds_dbe);
   1018 		rfs4_dbe_unlock(dsp->rds_dbe);
   1019 
   1020 		arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
   1021 		arg->recall = map->recall;
   1022 		arg->trunc = map->trunc;
   1023 		arg->dsp = dsp;
   1024 
   1025 		recall_count++;
   1026 
   1027 		(void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
   1028 		    minclsyspri);
   1029 	}
   1030 
   1031 	rfs4_dbe_unlock(fp->rf_dbe);
   1032 
   1033 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
   1034 	/*
   1035 	 * Recall count may go negative if the parent thread that is
   1036 	 * creating the individual callback threads does not modify
   1037 	 * the recall_count field before the callback thread actually
   1038 	 * gets a response from the CB_RECALL
   1039 	 */
   1040 	fp->rf_dinfo.rd_recall_count += recall_count;
   1041 	while (fp->rf_dinfo.rd_recall_count)
   1042 		cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
   1043 
   1044 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
   1045 
   1046 	DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
   1047 	rfs4_file_rele(fp);
   1048 	kmem_free(map, sizeof (struct master_recall_args));
   1049 	mutex_enter(&cpr_lock);
   1050 	CALLB_CPR_EXIT(&cpr_info);
   1051 	mutex_destroy(&cpr_lock);
   1052 }
   1053 
   1054 static void
   1055 rfs4_recall_file(rfs4_file_t *fp,
   1056     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
   1057     bool_t trunc, rfs4_client_t *cp)
   1058 {
   1059 	struct master_recall_args *args;
   1060 
   1061 	rfs4_dbe_lock(fp->rf_dbe);
   1062 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
   1063 		rfs4_dbe_unlock(fp->rf_dbe);
   1064 		return;
   1065 	}
   1066 	rfs4_dbe_hold(fp->rf_dbe);	/* hold for new thread */
   1067 
   1068 	/*
   1069 	 * Mark the time we started the recall processing.
   1070 	 * If it has been previously recalled, do not reset the
   1071 	 * timer since this is used for the revocation decision.
   1072 	 */
   1073 	if (fp->rf_dinfo.rd_time_recalled == 0)
   1074 		fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
   1075 	fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
   1076 	/* Client causing recall not always available */
   1077 	if (cp)
   1078 		fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
   1079 
   1080 	rfs4_dbe_unlock(fp->rf_dbe);
   1081 
   1082 	args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
   1083 	args->fp = fp;
   1084 	args->recall = recall;
   1085 	args->trunc = trunc;
   1086 
   1087 	(void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
   1088 	    minclsyspri);
   1089 }
   1090 
   1091 void
   1092 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
   1093 {
   1094 	time_t elapsed1, elapsed2;
   1095 
   1096 	if (fp->rf_dinfo.rd_time_recalled != 0) {
   1097 		elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
   1098 		elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
   1099 		/* First check to see if a revocation should occur */
   1100 		if (elapsed1 > rfs4_lease_time &&
   1101 		    elapsed2 > rfs4_lease_time) {
   1102 			rfs4_revoke_file(fp);
   1103 			return;
   1104 		}
   1105 		/*
   1106 		 * Next check to see if a recall should be done again
   1107 		 * so quickly.
   1108 		 */
   1109 		if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
   1110 			return;
   1111 	}
   1112 	rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
   1113 }
   1114 
   1115 /*
   1116  * rfs4_check_recall is called from rfs4_do_open to determine if the current
   1117  * open conflicts with the delegation.
   1118  * Return true if we need recall otherwise false.
   1119  * Assumes entry locks for sp and sp->rs_finfo are held.
   1120  */
   1121 bool_t
   1122 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
   1123 {
   1124 	open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
   1125 
   1126 	switch (dtype) {
   1127 	case OPEN_DELEGATE_NONE:
   1128 		/* Not currently delegated so there is nothing to do */
   1129 		return (FALSE);
   1130 	case OPEN_DELEGATE_READ:
   1131 		/*
   1132 		 * If the access is only asking for READ then there is
   1133 		 * no conflict and nothing to do.  If it is asking
   1134 		 * for write, then there will be conflict and the read
   1135 		 * delegation should be recalled.
   1136 		 */
   1137 		if (access == OPEN4_SHARE_ACCESS_READ)
   1138 			return (FALSE);
   1139 		else
   1140 			return (TRUE);
   1141 	case OPEN_DELEGATE_WRITE:
   1142 		/* Check to see if this client has the delegation */
   1143 		return (rfs4_is_deleg(sp));
   1144 	}
   1145 
   1146 	return (FALSE);
   1147 }
   1148 
   1149 /*
   1150  * Return the "best" allowable delegation available given the current
   1151  * delegation type and the desired access and deny modes on the file.
   1152  * At the point that this routine is called we know that the access and
   1153  * deny modes are consistent with the file modes.
   1154  */
   1155 static open_delegation_type4
   1156 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
   1157 {
   1158 	open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
   1159 	uint32_t access = sp->rs_share_access;
   1160 	uint32_t deny = sp->rs_share_deny;
   1161 	int readcnt = 0;
   1162 	int writecnt = 0;
   1163 
   1164 	switch (dtype) {
   1165 	case OPEN_DELEGATE_NONE:
   1166 		/*
   1167 		 * Determine if more than just this OPEN have the file
   1168 		 * open and if so, no delegation may be provided to
   1169 		 * the client.
   1170 		 */
   1171 		if (access & OPEN4_SHARE_ACCESS_WRITE)
   1172 			writecnt++;
   1173 		if (access & OPEN4_SHARE_ACCESS_READ)
   1174 			readcnt++;
   1175 
   1176 		if (fp->rf_access_read > readcnt ||
   1177 		    fp->rf_access_write > writecnt)
   1178 			return (OPEN_DELEGATE_NONE);
   1179 
   1180 		/*
   1181 		 * If the client is going to write, or if the client
   1182 		 * has exclusive access, return a write delegation.
   1183 		 */
   1184 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
   1185 		    (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
   1186 			return (OPEN_DELEGATE_WRITE);
   1187 		/*
   1188 		 * If we don't want to write or we've haven't denied read
   1189 		 * access to others, return a read delegation.
   1190 		 */
   1191 		if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
   1192 		    (deny & ~OPEN4_SHARE_DENY_READ))
   1193 			return (OPEN_DELEGATE_READ);
   1194 
   1195 		/* Shouldn't get here */
   1196 		return (OPEN_DELEGATE_NONE);
   1197 
   1198 	case OPEN_DELEGATE_READ:
   1199 		/*
   1200 		 * If the file is delegated for read but we wan't to
   1201 		 * write or deny others to read then we can't delegate
   1202 		 * the file. We shouldn't get here since the delegation should
   1203 		 * have been recalled already.
   1204 		 */
   1205 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
   1206 		    (deny & OPEN4_SHARE_DENY_READ))
   1207 			return (OPEN_DELEGATE_NONE);
   1208 		return (OPEN_DELEGATE_READ);
   1209 
   1210 	case OPEN_DELEGATE_WRITE:
   1211 		return (OPEN_DELEGATE_WRITE);
   1212 	}
   1213 
   1214 	/* Shouldn't get here */
   1215 	return (OPEN_DELEGATE_NONE);
   1216 }
   1217 
   1218 /*
   1219  * Given the desired delegation type and the "history" of the file
   1220  * determine the actual delegation type to return.
   1221  */
   1222 static open_delegation_type4
   1223 rfs4_delegation_policy(open_delegation_type4 dtype,
   1224     rfs4_dinfo_t *dinfo, clientid4 cid)
   1225 {
   1226 	time_t elapsed;
   1227 
   1228 	if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
   1229 		return (OPEN_DELEGATE_NONE);
   1230 
   1231 	/*
   1232 	 * Has this file/delegation ever been recalled?  If not then
   1233 	 * no further checks for a delegation race need to be done.
   1234 	 * However if a recall has occurred, then check to see if a
   1235 	 * client has caused its own delegation recall to occur.  If
   1236 	 * not, then has a delegation for this file been returned
   1237 	 * recently?  If so, then do not assign a new delegation to
   1238 	 * avoid a "delegation race" between the original client and
   1239 	 * the new/conflicting client.
   1240 	 */
   1241 	if (dinfo->rd_ever_recalled == TRUE) {
   1242 		if (dinfo->rd_conflicted_client != cid) {
   1243 			elapsed = gethrestime_sec() - dinfo->rd_time_returned;
   1244 			if (elapsed < rfs4_lease_time)
   1245 				return (OPEN_DELEGATE_NONE);
   1246 		}
   1247 	}
   1248 
   1249 	/* Limit the number of read grants */
   1250 	if (dtype == OPEN_DELEGATE_READ &&
   1251 	    dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
   1252 		return (OPEN_DELEGATE_NONE);
   1253 
   1254 	/*
   1255 	 * Should consider limiting total number of read/write
   1256 	 * delegations the server will permit.
   1257 	 */
   1258 
   1259 	return (dtype);
   1260 }
   1261 
   1262 /*
   1263  * Try and grant a delegation for an open give the state. The routine
   1264  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
   1265  *
   1266  * The state and associate file entry must be locked
   1267  */
   1268 rfs4_deleg_state_t *
   1269 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
   1270 {
   1271 	rfs4_file_t *fp = sp->rs_finfo;
   1272 	open_delegation_type4 dtype;
   1273 	int no_delegation;
   1274 
   1275 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
   1276 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
   1277 
   1278 	/* Is the server even providing delegations? */
   1279 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
   1280 		return (NULL);
   1281 
   1282 	/* Check to see if delegations have been temporarily disabled */
   1283 	mutex_enter(&rfs4_deleg_lock);
   1284 	no_delegation = rfs4_deleg_disabled;
   1285 	mutex_exit(&rfs4_deleg_lock);
   1286 
   1287 	if (no_delegation)
   1288 		return (NULL);
   1289 
   1290 	/* Don't grant a delegation if a deletion is impending. */
   1291 	if (fp->rf_dinfo.rd_hold_grant > 0) {
   1292 		return (NULL);
   1293 	}
   1294 
   1295 	/*
   1296 	 * Don't grant a delegation if there are any lock manager
   1297 	 * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
   1298 	 * if there are only read locks we should be able to grant a
   1299 	 * read-only delegation), but it's good enough for now.
   1300 	 *
   1301 	 * MT safety: the lock manager checks for conflicting delegations
   1302 	 * before processing a lock request.  That check will block until
   1303 	 * we are done here.  So if the lock manager acquires a lock after
   1304 	 * we decide to grant the delegation, the delegation will get
   1305 	 * immediately recalled (if there's a conflict), so we're safe.
   1306 	 */
   1307 	if (lm_vp_active(fp->rf_vp)) {
   1308 		return (NULL);
   1309 	}
   1310 
   1311 	/*
   1312 	 * Based on the type of delegation request passed in, take the
   1313 	 * appropriate action (DELEG_NONE is handled above)
   1314 	 */
   1315 	switch (dreq) {
   1316 
   1317 	case DELEG_READ:
   1318 	case DELEG_WRITE:
   1319 		/*
   1320 		 * The server "must" grant the delegation in this case.
   1321 		 * Client is using open previous
   1322 		 */
   1323 		dtype = (open_delegation_type4)dreq;
   1324 		*recall = 1;
   1325 		break;
   1326 	case DELEG_ANY:
   1327 		/*
   1328 		 * If a valid callback path does not exist, no delegation may
   1329 		 * be granted.
   1330 		 */
   1331 		if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
   1332 			return (NULL);
   1333 
   1334 		/*
   1335 		 * If the original operation which caused time_rm_delayed
   1336 		 * to be set hasn't been retried and completed for one
   1337 		 * full lease period, clear it and allow delegations to
   1338 		 * get granted again.
   1339 		 */
   1340 		if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
   1341 		    gethrestime_sec() >
   1342 		    fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
   1343 			fp->rf_dinfo.rd_time_rm_delayed = 0;
   1344 
   1345 		/*
   1346 		 * If we are waiting for a delegation to be returned then
   1347 		 * don't delegate this file. We do this for correctness as
   1348 		 * well as if the file is being recalled we would likely
   1349 		 * recall this file again.
   1350 		 */
   1351 
   1352 		if (fp->rf_dinfo.rd_time_recalled != 0 ||
   1353 		    fp->rf_dinfo.rd_time_rm_delayed != 0)
   1354 			return (NULL);
   1355 
   1356 		/* Get the "best" delegation candidate */
   1357 		dtype = rfs4_check_delegation(sp, fp);
   1358 
   1359 		if (dtype == OPEN_DELEGATE_NONE)
   1360 			return (NULL);
   1361 
   1362 		/*
   1363 		 * Based on policy and the history of the file get the
   1364 		 * actual delegation.
   1365 		 */
   1366 		dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
   1367 		    sp->rs_owner->ro_client->rc_clientid);
   1368 
   1369 		if (dtype == OPEN_DELEGATE_NONE)
   1370 			return (NULL);
   1371 		break;
   1372 	default:
   1373 		return (NULL);
   1374 	}
   1375 
   1376 	/* set the delegation for the state */
   1377 	return (rfs4_deleg_state(sp, dtype, recall));
   1378 }
   1379 
   1380 void
   1381 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
   1382     nfsace4 *ace,  int recall)
   1383 {
   1384 	open_write_delegation4 *wp;
   1385 	open_read_delegation4 *rp;
   1386 	nfs_space_limit4 *spl;
   1387 	nfsace4 nace;
   1388 
   1389 	/*
   1390 	 * We need to allocate a new copy of the who string.
   1391 	 * this string will be freed by the rfs4_op_open dis_resfree
   1392 	 * routine. We need to do this allocation since replays will
   1393 	 * be allocated and rfs4_compound can't tell the difference from
   1394 	 * a replay and an inital open. N.B. if an ace is passed in, it
   1395 	 * the caller's responsibility to free it.
   1396 	 */
   1397 
   1398 	if (ace == NULL) {
   1399 		/*
   1400 		 * Default is to deny all access, the client will have
   1401 		 * to contact the server.  XXX Do we want to actually
   1402 		 * set a deny for every one, or do we simply want to
   1403 		 * construct an entity that will match no one?
   1404 		 */
   1405 		nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
   1406 		nace.flag = 0;
   1407 		nace.access_mask = ACE4_VALID_MASK_BITS;
   1408 		(void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
   1409 	} else {
   1410 		nace.type = ace->type;
   1411 		nace.flag = ace->flag;
   1412 		nace.access_mask = ace->access_mask;
   1413 		(void) utf8_copy(&ace->who, &nace.who);
   1414 	}
   1415 
   1416 	dp->delegation_type = dsp->rds_dtype;
   1417 
   1418 	switch (dsp->rds_dtype) {
   1419 	case OPEN_DELEGATE_NONE:
   1420 		break;
   1421 	case OPEN_DELEGATE_READ:
   1422 		rp = &dp->open_delegation4_u.read;
   1423 		rp->stateid = dsp->rds_delegid.stateid;
   1424 		rp->recall = (bool_t)recall;
   1425 		rp->permissions = nace;
   1426 		break;
   1427 	case OPEN_DELEGATE_WRITE:
   1428 		wp = &dp->open_delegation4_u.write;
   1429 		wp->stateid = dsp->rds_delegid.stateid;
   1430 		wp->recall = (bool_t)recall;
   1431 		spl = &wp->space_limit;
   1432 		spl->limitby = NFS_LIMIT_SIZE;
   1433 		spl->nfs_space_limit4_u.filesize = 0;
   1434 		wp->permissions = nace;
   1435 		break;
   1436 	}
   1437 }
   1438 
   1439 /*
   1440  * Check if the file is delegated via the provided file struct.
   1441  * Return TRUE if it is delegated.  This is intended for use by
   1442  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
   1443  *
   1444  * Note that if the file is found to have a delegation, it is
   1445  * recalled, unless the clientid of the caller matches the clientid of the
   1446  * delegation. If the caller has specified, there is a slight delay
   1447  * inserted in the hopes that the delegation will be returned quickly.
   1448  */
   1449 bool_t
   1450 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
   1451     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
   1452 {
   1453 	rfs4_deleg_state_t *dsp;
   1454 
   1455 	/* Is delegation enabled? */
   1456 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
   1457 		return (FALSE);
   1458 
   1459 	/* do we have a delegation on this file? */
   1460 	rfs4_dbe_lock(fp->rf_dbe);
   1461 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
   1462 		if (is_rm)
   1463 			fp->rf_dinfo.rd_hold_grant++;
   1464 		rfs4_dbe_unlock(fp->rf_dbe);
   1465 		return (FALSE);
   1466 	}
   1467 	/*
   1468 	 * do we have a write delegation on this file or are we
   1469 	 * requesting write access to a file with any type of existing
   1470 	 * delegation?
   1471 	 */
   1472 	if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
   1473 		if (cp != NULL) {
   1474 			dsp = list_head(&fp->rf_delegstatelist);
   1475 			if (dsp == NULL) {
   1476 				rfs4_dbe_unlock(fp->rf_dbe);
   1477 				return (FALSE);
   1478 			}
   1479 			/*
   1480 			 * Does the requestor already own the delegation?
   1481 			 */
   1482 			if (dsp->rds_client->rc_clientid == *(cp)) {
   1483 				rfs4_dbe_unlock(fp->rf_dbe);
   1484 				return (FALSE);
   1485 			}
   1486 		}
   1487 
   1488 		rfs4_dbe_unlock(fp->rf_dbe);
   1489 		rfs4_recall_deleg(fp, trunc, NULL);
   1490 
   1491 		if (!do_delay) {
   1492 			rfs4_dbe_lock(fp->rf_dbe);
   1493 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
   1494 			rfs4_dbe_unlock(fp->rf_dbe);
   1495 			return (TRUE);
   1496 		}
   1497 
   1498 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
   1499 
   1500 		rfs4_dbe_lock(fp->rf_dbe);
   1501 		if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
   1502 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
   1503 			rfs4_dbe_unlock(fp->rf_dbe);
   1504 			return (TRUE);
   1505 		}
   1506 	}
   1507 	if (is_rm)
   1508 		fp->rf_dinfo.rd_hold_grant++;
   1509 	rfs4_dbe_unlock(fp->rf_dbe);
   1510 	return (FALSE);
   1511 }
   1512 
   1513 /*
   1514  * Check if the file is delegated in the case of a v2 or v3 access.
   1515  * Return TRUE if it is delegated which in turn means that v2 should
   1516  * drop the request and in the case of v3 JUKEBOX should be returned.
   1517  */
   1518 bool_t
   1519 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
   1520 {
   1521 	rfs4_file_t *fp;
   1522 	bool_t create = FALSE;
   1523 	bool_t rc = FALSE;
   1524 
   1525 	rfs4_hold_deleg_policy();
   1526 
   1527 	/* Is delegation enabled? */
   1528 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
   1529 		fp = rfs4_findfile(vp, NULL, &create);
   1530 		if (fp != NULL) {
   1531 			if (rfs4_check_delegated_byfp(mode, fp, trunc,
   1532 			    TRUE, FALSE, NULL)) {
   1533 				rc = TRUE;
   1534 			}
   1535 			rfs4_file_rele(fp);
   1536 		}
   1537 	}
   1538 	rfs4_rele_deleg_policy();
   1539 	return (rc);
   1540 }
   1541 
   1542 /*
   1543  * Release a hold on the hold_grant counter which
   1544  * prevents delegation from being granted while a remove
   1545  * or a rename is in progress.
   1546  */
   1547 void
   1548 rfs4_clear_dont_grant(rfs4_file_t *fp)
   1549 {
   1550 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
   1551 		return;
   1552 	rfs4_dbe_lock(fp->rf_dbe);
   1553 	ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
   1554 	fp->rf_dinfo.rd_hold_grant--;
   1555 	fp->rf_dinfo.rd_time_rm_delayed = 0;
   1556 	rfs4_dbe_unlock(fp->rf_dbe);
   1557 }
   1558 
   1559 /*
   1560  * State support for delegation.
   1561  * Set the state delegation type for this state;
   1562  * This routine is called from open via rfs4_grant_delegation and the entry
   1563  * locks on sp and sp->rs_finfo are assumed.
   1564  */
   1565 static rfs4_deleg_state_t *
   1566 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
   1567 {
   1568 	rfs4_file_t *fp = sp->rs_finfo;
   1569 	bool_t create = TRUE;
   1570 	rfs4_deleg_state_t *dsp;
   1571 	vnode_t *vp;
   1572 	int open_prev = *recall;
   1573 	int ret;
   1574 	int fflags = 0;
   1575 
   1576 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
   1577 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
   1578 
   1579 	/* Shouldn't happen */
   1580 	if (fp->rf_dinfo.rd_recall_count != 0 ||
   1581 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
   1582 	    dtype != OPEN_DELEGATE_READ)) {
   1583 		return (NULL);
   1584 	}
   1585 
   1586 	/* Unlock to avoid deadlock */
   1587 	rfs4_dbe_unlock(fp->rf_dbe);
   1588 	rfs4_dbe_unlock(sp->rs_dbe);
   1589 
   1590 	dsp = rfs4_finddeleg(sp, &create);
   1591 
   1592 	rfs4_dbe_lock(sp->rs_dbe);
   1593 	rfs4_dbe_lock(fp->rf_dbe);
   1594 
   1595 	if (dsp == NULL)
   1596 		return (NULL);
   1597 
   1598 	/*
   1599 	 * It is possible that since we dropped the lock
   1600 	 * in order to call finddeleg, the rfs4_file_t
   1601 	 * was marked such that we should not grant a
   1602 	 * delegation, if so bail out.
   1603 	 */
   1604 	if (fp->rf_dinfo.rd_hold_grant > 0) {
   1605 		rfs4_deleg_state_rele(dsp);
   1606 		return (NULL);
   1607 	}
   1608 
   1609 	if (create == FALSE) {
   1610 		if (sp->rs_owner->ro_client == dsp->rds_client &&
   1611 		    dsp->rds_dtype == dtype) {
   1612 			return (dsp);
   1613 		} else {
   1614 			rfs4_deleg_state_rele(dsp);
   1615 			return (NULL);
   1616 		}
   1617 	}
   1618 
   1619 	/*
   1620 	 * Check that this file has not been delegated to another
   1621 	 * client
   1622 	 */
   1623 	if (fp->rf_dinfo.rd_recall_count != 0 ||
   1624 	    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
   1625 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
   1626 	    dtype != OPEN_DELEGATE_READ)) {
   1627 		rfs4_deleg_state_rele(dsp);
   1628 		return (NULL);
   1629 	}
   1630 
   1631 	vp = fp->rf_vp;
   1632 	/* vnevent_support returns 0 if file system supports vnevents */
   1633 	if (vnevent_support(vp, NULL)) {
   1634 		rfs4_deleg_state_rele(dsp);
   1635 		return (NULL);
   1636 	}
   1637 
   1638 	/* Calculate the fflags for this OPEN. */
   1639 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
   1640 		fflags |= FREAD;
   1641 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
   1642 		fflags |= FWRITE;
   1643 
   1644 	*recall = 0;
   1645 	/*
   1646 	 * Before granting a delegation we need to know if anyone else has
   1647 	 * opened the file in a conflicting mode.  However, first we need to
   1648 	 * know how we opened the file to check the counts properly.
   1649 	 */
   1650 	if (dtype == OPEN_DELEGATE_READ) {
   1651 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
   1652 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
   1653 		    vn_is_mapped(vp, V_WRITE)) {
   1654 			if (open_prev) {
   1655 				*recall = 1;
   1656 			} else {
   1657 				rfs4_deleg_state_rele(dsp);
   1658 				return (NULL);
   1659 			}
   1660 		}
   1661 		ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
   1662 		    rfs4_mon_hold, rfs4_mon_rele);
   1663 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
   1664 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
   1665 		    vn_is_mapped(vp, V_WRITE)) {
   1666 			if (open_prev) {
   1667 				*recall = 1;
   1668 			} else {
   1669 				(void) fem_uninstall(vp, deleg_rdops,
   1670 				    (void *)fp);
   1671 				rfs4_deleg_state_rele(dsp);
   1672 				return (NULL);
   1673 			}
   1674 		}
   1675 		/*
   1676 		 * Because a client can hold onto a delegation after the
   1677 		 * file has been closed, we need to keep track of the
   1678 		 * access to this file.  Otherwise the CIFS server would
   1679 		 * not know about the client accessing the file and could
   1680 		 * inappropriately grant an OPLOCK.
   1681 		 * fem_install() returns EBUSY when asked to install a
   1682 		 * OPUNIQ monitor more than once.  Therefore, check the
   1683 		 * return code because we only want this done once.
   1684 		 */
   1685 		if (ret == 0)
   1686 			vn_open_upgrade(vp, FREAD);
   1687 	} else { /* WRITE */
   1688 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
   1689 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
   1690 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
   1691 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
   1692 		    vn_is_mapped(vp, V_RDORWR)) {
   1693 			if (open_prev) {
   1694 				*recall = 1;
   1695 			} else {
   1696 				rfs4_deleg_state_rele(dsp);
   1697 				return (NULL);
   1698 			}
   1699 		}
   1700 		ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
   1701 		    rfs4_mon_hold, rfs4_mon_rele);
   1702 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
   1703 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
   1704 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
   1705 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
   1706 		    vn_is_mapped(vp, V_RDORWR)) {
   1707 			if (open_prev) {
   1708 				*recall = 1;
   1709 			} else {
   1710 				(void) fem_uninstall(vp, deleg_wrops,
   1711 				    (void *)fp);
   1712 				rfs4_deleg_state_rele(dsp);
   1713 				return (NULL);
   1714 			}
   1715 		}
   1716 		/*
   1717 		 * Because a client can hold onto a delegation after the
   1718 		 * file has been closed, we need to keep track of the
   1719 		 * access to this file.  Otherwise the CIFS server would
   1720 		 * not know about the client accessing the file and could
   1721 		 * inappropriately grant an OPLOCK.
   1722 		 * fem_install() returns EBUSY when asked to install a
   1723 		 * OPUNIQ monitor more than once.  Therefore, check the
   1724 		 * return code because we only want this done once.
   1725 		 */
   1726 		if (ret == 0)
   1727 			vn_open_upgrade(vp, FREAD|FWRITE);
   1728 	}
   1729 	/* Place on delegation list for file */
   1730 	ASSERT(!list_link_active(&dsp->rds_node));
   1731 	list_insert_tail(&fp->rf_delegstatelist, dsp);
   1732 
   1733 	dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
   1734 
   1735 	/* Update delegation stats for this file */
   1736 	fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
   1737 
   1738 	/* reset since this is a new delegation */
   1739 	fp->rf_dinfo.rd_conflicted_client = 0;
   1740 	fp->rf_dinfo.rd_ever_recalled = FALSE;
   1741 
   1742 	if (dtype == OPEN_DELEGATE_READ)
   1743 		fp->rf_dinfo.rd_rdgrants++;
   1744 	else
   1745 		fp->rf_dinfo.rd_wrgrants++;
   1746 
   1747 	return (dsp);
   1748 }
   1749 
   1750 /*
   1751  * State routine for the server when a delegation is returned.
   1752  */
   1753 void
   1754 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
   1755 {
   1756 	rfs4_file_t *fp = dsp->rds_finfo;
   1757 	open_delegation_type4 dtypewas;
   1758 
   1759 	rfs4_dbe_lock(fp->rf_dbe);
   1760 
   1761 	/* nothing to do if no longer on list */
   1762 	if (!list_link_active(&dsp->rds_node)) {
   1763 		rfs4_dbe_unlock(fp->rf_dbe);
   1764 		return;
   1765 	}
   1766 
   1767 	/* Remove state from recall list */
   1768 	list_remove(&fp->rf_delegstatelist, dsp);
   1769 
   1770 	if (list_is_empty(&fp->rf_delegstatelist)) {
   1771 		dtypewas = fp->rf_dinfo.rd_dtype;
   1772 		fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
   1773 		rfs4_dbe_cv_broadcast(fp->rf_dbe);
   1774 
   1775 		/* if file system was unshared, the vp will be NULL */
   1776 		if (fp->rf_vp != NULL) {
   1777 			/*
   1778 			 * Once a delegation is no longer held by any client,
   1779 			 * the monitor is uninstalled.  At this point, the
   1780 			 * client must send OPEN otw, so we don't need the
   1781 			 * reference on the vnode anymore.  The open
   1782 			 * downgrade removes the reference put on earlier.
   1783 			 */
   1784 			if (dtypewas == OPEN_DELEGATE_READ) {
   1785 				(void) fem_uninstall(fp->rf_vp, deleg_rdops,
   1786 				    (void *)fp);
   1787 				vn_open_downgrade(fp->rf_vp, FREAD);
   1788 			} else if (dtypewas == OPEN_DELEGATE_WRITE) {
   1789 				(void) fem_uninstall(fp->rf_vp, deleg_wrops,
   1790 				    (void *)fp);
   1791 				vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
   1792 			}
   1793 		}
   1794 	}
   1795 
   1796 	switch (dsp->rds_dtype) {
   1797 	case OPEN_DELEGATE_READ:
   1798 		fp->rf_dinfo.rd_rdgrants--;
   1799 		break;
   1800 	case OPEN_DELEGATE_WRITE:
   1801 		fp->rf_dinfo.rd_wrgrants--;
   1802 		break;
   1803 	default:
   1804 		break;
   1805 	}
   1806 
   1807 	/* used in the policy decision */
   1808 	fp->rf_dinfo.rd_time_returned = gethrestime_sec();
   1809 
   1810 	/*
   1811 	 * reset the time_recalled field so future delegations are not
   1812 	 * accidentally revoked
   1813 	 */
   1814 	if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
   1815 		fp->rf_dinfo.rd_time_recalled = 0;
   1816 
   1817 	rfs4_dbe_unlock(fp->rf_dbe);
   1818 
   1819 	rfs4_dbe_lock(dsp->rds_dbe);
   1820 
   1821 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
   1822 
   1823 	if (revoked == TRUE)
   1824 		dsp->rds_time_revoked = gethrestime_sec();
   1825 
   1826 	rfs4_dbe_invalidate(dsp->rds_dbe);
   1827 
   1828 	rfs4_dbe_unlock(dsp->rds_dbe);
   1829 
   1830 	if (revoked == TRUE) {
   1831 		rfs4_dbe_lock(dsp->rds_client->rc_dbe);
   1832 		dsp->rds_client->rc_deleg_revoked++;	/* observability */
   1833 		rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
   1834 	}
   1835 }
   1836 
   1837 static void
   1838 rfs4_revoke_file(rfs4_file_t *fp)
   1839 {
   1840 	rfs4_deleg_state_t *dsp;
   1841 
   1842 	/*
   1843 	 * The lock for rfs4_file_t must be held when traversing the
   1844 	 * delegation list but that lock needs to be released to call
   1845 	 * rfs4_return_deleg()
   1846 	 */
   1847 	rfs4_dbe_lock(fp->rf_dbe);
   1848 	while (dsp = list_head(&fp->rf_delegstatelist)) {
   1849 		rfs4_dbe_hold(dsp->rds_dbe);
   1850 		rfs4_dbe_unlock(fp->rf_dbe);
   1851 		rfs4_return_deleg(dsp, TRUE);
   1852 		rfs4_deleg_state_rele(dsp);
   1853 		rfs4_dbe_lock(fp->rf_dbe);
   1854 	}
   1855 	rfs4_dbe_unlock(fp->rf_dbe);
   1856 }
   1857 
   1858 /*
   1859  * A delegation is assumed to be present on the file associated with
   1860  * "sp".  Check to see if the delegation matches is associated with
   1861  * the same client as referenced by "sp".  If it is not, TRUE is
   1862  * returned.  If the delegation DOES match the client (or no
   1863  * delegation is present), return FALSE.
   1864  * Assume the state entry and file entry are locked.
   1865  */
   1866 bool_t
   1867 rfs4_is_deleg(rfs4_state_t *sp)
   1868 {
   1869 	rfs4_deleg_state_t *dsp;
   1870 	rfs4_file_t *fp = sp->rs_finfo;
   1871 	rfs4_client_t *cp = sp->rs_owner->ro_client;
   1872 
   1873 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
   1874 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
   1875 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
   1876 		if (cp != dsp->rds_client) {
   1877 			return (TRUE);
   1878 		}
   1879 	}
   1880 	return (FALSE);
   1881 }
   1882 
   1883 void
   1884 rfs4_disable_delegation(void)
   1885 {
   1886 	mutex_enter(&rfs4_deleg_lock);
   1887 	rfs4_deleg_disabled++;
   1888 	mutex_exit(&rfs4_deleg_lock);
   1889 }
   1890 
   1891 void
   1892 rfs4_enable_delegation(void)
   1893 {
   1894 	mutex_enter(&rfs4_deleg_lock);
   1895 	ASSERT(rfs4_deleg_disabled > 0);
   1896 	rfs4_deleg_disabled--;
   1897 	mutex_exit(&rfs4_deleg_lock);
   1898 }
   1899 
   1900 void
   1901 rfs4_mon_hold(void *arg)
   1902 {
   1903 	rfs4_file_t *fp = arg;
   1904 
   1905 	rfs4_dbe_hold(fp->rf_dbe);
   1906 }
   1907 
   1908 void
   1909 rfs4_mon_rele(void *arg)
   1910 {
   1911 	rfs4_file_t *fp = arg;
   1912 
   1913 	rfs4_dbe_rele_nolock(fp->rf_dbe);
   1914 }
   1915