1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <rpc/auth.h> 28 #include <rpc/clnt.h> 29 #include <nfs/nfs4_kprot.h> 30 #include <nfs/nfs4.h> 31 #include <nfs/lm.h> 32 #include <sys/cmn_err.h> 33 #include <sys/disp.h> 34 #include <sys/sdt.h> 35 36 #include <sys/pathname.h> 37 38 #include <sys/strsubr.h> 39 #include <sys/ddi.h> 40 41 #include <sys/vnode.h> 42 #include <sys/sdt.h> 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <sys/sdt.h> 47 48 #define MAX_READ_DELEGATIONS 5 49 50 static int rfs4_max_setup_cb_tries = 5; 51 52 #ifdef DEBUG 53 static int rfs4_test_cbgetattr_fail = 0; 54 int rfs4_cb_null; 55 int rfs4_cb_debug; 56 int rfs4_deleg_debug; 57 #endif 58 59 int mds_cbrecall_no_session = 0; 60 61 static void rfs4_recall_file(rfs4_file_t *, bool_t, rfs4_client_t *); 62 static void rfs4_revoke_deleg(rfs4_deleg_state_t *); 63 static void rfs41_revoke_deleg(rfs4_deleg_state_t *); 64 static void rfs4_revoke_file(rfs4_file_t *); 65 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 66 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 67 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 68 static rfs4_deleg_state_t *rfs4_deleg_state(struct compound_state *, 69 rfs4_state_t *, open_delegation_type4, int *); 70 71 /* 72 * Convert a universal address to an transport specific 73 * address using inet_pton. 74 */ 75 int 76 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 77 { 78 int dots = 0, i, j, len, k; 79 unsigned char c; 80 in_port_t port = 0; 81 82 len = strlen(ua); 83 84 for (i = len-1; i >= 0; i--) { 85 86 if (ua[i] == '.') 87 dots++; 88 89 if (dots == 2) { 90 91 ua[i] = '\0'; 92 /* 93 * We use k to remember were to stick '.' back, since 94 * ua was kmem_allocateded from the pool len+1. 95 */ 96 k = i; 97 if (inet_pton(af, ua, ap) == 1) { 98 99 c = 0; 100 101 for (j = i+1; j < len; j++) { 102 if (ua[j] == '.') { 103 port = c << 8; 104 c = 0; 105 } else if (ua[j] >= '0' && 106 ua[j] <= '9') { 107 c *= 10; 108 c += ua[j] - '0'; 109 } else { 110 ua[k] = '.'; 111 return (EINVAL); 112 } 113 } 114 port += c; 115 116 117 /* reset to network order */ 118 if (af == AF_INET) { 119 *(uint32_t *)ap = 120 htonl(*(uint32_t *)ap); 121 *pp = htons(port); 122 } else { 123 int ix; 124 uint16_t *sap; 125 126 for (sap = ap, ix = 0; ix < 127 sizeof (struct in6_addr) / 128 sizeof (uint16_t); ix++) 129 sap[ix] = htons(sap[ix]); 130 131 *pp = htons(port); 132 } 133 134 ua[k] = '.'; 135 return (0); 136 } else { 137 ua[k] = '.'; 138 return (EINVAL); 139 } 140 } 141 } 142 143 return (EINVAL); 144 } 145 146 /* 147 * Update the delegation policy with the 148 * value of "new_policy" 149 */ 150 void 151 rfs4_set_deleg_policy(nfs_server_instance_t *instp, 152 srv_deleg_policy_t new_policy) 153 { 154 rw_enter(&instp->deleg_policy_lock, RW_WRITER); 155 instp->deleg_policy = new_policy; 156 rw_exit(&instp->deleg_policy_lock); 157 } 158 159 void 160 rfs4_hold_deleg_policy(nfs_server_instance_t *instp) 161 { 162 rw_enter(&instp->deleg_policy_lock, RW_READER); 163 } 164 165 void 166 rfs4_rele_deleg_policy(nfs_server_instance_t *instp) 167 { 168 rw_exit(&instp->deleg_policy_lock); 169 } 170 171 172 /* 173 * This free function is to be used when the client struct is being 174 * released and nothing at all is needed of the callback info any 175 * longer. 176 */ 177 void 178 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 179 { 180 char *addr = cbp->cb_callback.cb_location.r_addr; 181 char *netid = cbp->cb_callback.cb_location.r_netid; 182 183 /* Free old address if any */ 184 185 if (addr) 186 kmem_free(addr, strlen(addr) + 1); 187 if (netid) 188 kmem_free(netid, strlen(netid) + 1); 189 190 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 191 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 192 193 if (addr) 194 kmem_free(addr, strlen(addr) + 1); 195 if (netid) 196 kmem_free(netid, strlen(netid) + 1); 197 198 if (cbp->cb_chc_free) { 199 rfs4_cb_chflush(cbp); 200 } 201 } 202 203 /* 204 * The server uses this to check the callback path supplied by the 205 * client. The callback connection is marked "in progress" while this 206 * work is going on and then eventually marked either OK or FAILED. 207 * This work can be done as part of a separate thread and at the end 208 * of this the thread will exit or it may be done such that the caller 209 * will continue with other work. 210 */ 211 static void 212 rfs4_do_cb_null(rfs4_client_t *cp) 213 { 214 struct timeval tv; 215 CLIENT *ch; 216 rfs4_cbstate_t newstate; 217 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 218 219 mutex_enter(cbp->cb_lock); 220 /* If another thread is doing CB_NULL RPC then return */ 221 if (cbp->cb_nullcaller == TRUE) { 222 mutex_exit(cbp->cb_lock); 223 rfs4_client_rele(cp); 224 return; 225 } 226 227 /* Mark the cbinfo as having a thread in the NULL callback */ 228 cbp->cb_nullcaller = TRUE; 229 230 /* 231 * Are there other threads still using the cbinfo client 232 * handles? If so, this thread must wait before going and 233 * mucking aroiund with the callback information 234 */ 235 while (cbp->cb_refcnt != 0) 236 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 237 238 /* 239 * This thread itself may find that new callback info has 240 * arrived and is set up to handle this case and redrive the 241 * call to the client's callback server. 242 */ 243 retry: 244 if (cbp->cb_newer.cb_new == TRUE && 245 cbp->cb_newer.cb_confirmed == TRUE) { 246 char *addr = cbp->cb_callback.cb_location.r_addr; 247 char *netid = cbp->cb_callback.cb_location.r_netid; 248 249 /* 250 * Free the old stuff if it exists; may be the first 251 * time through this path 252 */ 253 if (addr) 254 kmem_free(addr, strlen(addr) + 1); 255 if (netid) 256 kmem_free(netid, strlen(netid) + 1); 257 258 /* Move over the addr/netid */ 259 cbp->cb_callback.cb_location.r_addr = 260 cbp->cb_newer.cb_callback.cb_location.r_addr; 261 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 262 cbp->cb_callback.cb_location.r_netid = 263 cbp->cb_newer.cb_callback.cb_location.r_netid; 264 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 265 266 /* Get the program number */ 267 cbp->cb_callback.cb_program = 268 cbp->cb_newer.cb_callback.cb_program; 269 cbp->cb_newer.cb_callback.cb_program = 0; 270 271 /* Don't forget the protocol's "cb_ident" field */ 272 cbp->cb_ident = cbp->cb_newer.cb_ident; 273 cbp->cb_newer.cb_ident = 0; 274 275 /* no longer new */ 276 cbp->cb_newer.cb_new = FALSE; 277 cbp->cb_newer.cb_confirmed = FALSE; 278 279 /* get rid of the old client handles that may exist */ 280 rfs4_cb_chflush(cbp); 281 282 cbp->cb_state = CB_NONE; 283 cbp->cb_timefailed = 0; /* reset the clock */ 284 cbp->cb_notified_of_cb_path_down = TRUE; 285 } 286 287 if (cbp->cb_state != CB_NONE) { 288 cv_broadcast(cbp->cb_cv); /* let the others know */ 289 cbp->cb_nullcaller = FALSE; 290 mutex_exit(cbp->cb_lock); 291 rfs4_client_rele(cp); 292 return; 293 } 294 295 /* mark rfs4_client_t as CALLBACK NULL in progress */ 296 cbp->cb_state = CB_INPROG; 297 mutex_exit(cbp->cb_lock); 298 299 /* get/generate a client handle */ 300 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 301 mutex_enter(cbp->cb_lock); 302 cbp->cb_state = CB_BAD; 303 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 304 goto retry; 305 } 306 307 308 tv.tv_sec = 30; 309 tv.tv_usec = 0; 310 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 311 newstate = CB_BAD; 312 } else { 313 newstate = CB_OK; 314 #ifdef DEBUG 315 rfs4_cb_null++; 316 #endif 317 } 318 319 /* Check to see if the client has specified new callback info */ 320 mutex_enter(cbp->cb_lock); 321 rfs4_cb_freech(cbp, ch, TRUE); 322 if (cbp->cb_newer.cb_new == TRUE && 323 cbp->cb_newer.cb_confirmed == TRUE) { 324 goto retry; /* give the CB_NULL another chance */ 325 } 326 327 cbp->cb_state = newstate; 328 if (cbp->cb_state == CB_BAD) 329 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 330 331 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 332 cbp->cb_nullcaller = FALSE; 333 mutex_exit(cbp->cb_lock); 334 335 rfs4_client_rele(cp); 336 } 337 338 /* 339 * Given a client struct, inspect the callback info to see if the 340 * callback path is up and available. 341 * 342 * If new callback path is available and no one has set it up then 343 * try to set it up. If setup is not successful after 5 tries (5 secs) 344 * then gives up and returns NULL. 345 * 346 * If callback path is being initialized, then wait for the CB_NULL RPC 347 * call to occur. 348 */ 349 static rfs4_cbinfo_t * 350 rfs4_cbinfo_hold(rfs4_client_t *cp) 351 { 352 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 353 int retries = 0; 354 355 mutex_enter(cbp->cb_lock); 356 357 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 358 /* 359 * Looks like a new callback path may be available and 360 * noone has set it up. 361 */ 362 mutex_exit(cbp->cb_lock); 363 rfs4_dbe_hold(cp->rc_dbe); 364 rfs4_do_cb_null(cp); /* caller will release client hold */ 365 366 mutex_enter(cbp->cb_lock); 367 /* 368 * If callback path is no longer new, or it's being setup 369 * then stop and wait for it to be done. 370 */ 371 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 372 break; 373 mutex_exit(cbp->cb_lock); 374 375 if (++retries >= rfs4_max_setup_cb_tries) 376 return (NULL); 377 delay(hz); 378 mutex_enter(cbp->cb_lock); 379 } 380 381 /* Is there a thread working on doing the CB_NULL RPC? */ 382 if (cbp->cb_nullcaller == TRUE) 383 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 384 385 /* If the callback path is not okay (up and running), just quit */ 386 if (cbp->cb_state != CB_OK) { 387 mutex_exit(cbp->cb_lock); 388 return (NULL); 389 } 390 391 /* Let someone know we are using the current callback info */ 392 cbp->cb_refcnt++; 393 mutex_exit(cbp->cb_lock); 394 return (cbp); 395 } 396 397 /* 398 * The caller is done with the callback info. It may be that the 399 * caller's RPC failed and the NFSv4 client has actually provided new 400 * callback information. If so, let the caller know so they can 401 * advantage of this and maybe retry the RPC that originally failed. 402 */ 403 static int 404 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 405 { 406 int cb_new = FALSE; 407 408 mutex_enter(cbp->cb_lock); 409 410 /* The caller gets a chance to mark the callback info as bad */ 411 if (newstate != CB_NOCHANGE) 412 cbp->cb_state = newstate; 413 if (newstate == CB_FAILED) { 414 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 415 cbp->cb_notified_of_cb_path_down = FALSE; 416 } 417 418 cbp->cb_refcnt--; /* no longer using the information */ 419 420 /* 421 * A thread may be waiting on this one to finish and if so, 422 * let it know that it is okay to do the CB_NULL to the 423 * client's callback server. 424 */ 425 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 426 cv_broadcast(cbp->cb_cv_nullcaller); 427 428 /* 429 * If this is the last thread to use the callback info and 430 * there is new callback information to try and no thread is 431 * there ready to do the CB_NULL, then return true to teh 432 * caller so they can do the CB_NULL 433 */ 434 if (cbp->cb_refcnt == 0 && 435 cbp->cb_nullcaller == FALSE && 436 cbp->cb_newer.cb_new == TRUE && 437 cbp->cb_newer.cb_confirmed == TRUE) 438 cb_new = TRUE; 439 440 mutex_exit(cbp->cb_lock); 441 442 return (cb_new); 443 } 444 445 /* 446 * Common v4 routine to init a callback client handle 447 */ 448 449 static CLIENT * 450 cbch_init(struct netbuf *nb, uint32_t cb_program) 451 { 452 struct knetconfig knc; 453 vnode_t *vp; 454 char *devnam; 455 int err = 0; 456 CLIENT *ch = NULL; 457 struct sockaddr *sa; 458 459 sa = (struct sockaddr *)nb->buf; 460 461 if (sa->sa_family == AF_INET) { 462 knc.knc_semantics = NC_TPI_COTS; 463 knc.knc_protofmly = "inet"; 464 knc.knc_proto = "tcp"; 465 devnam = "/dev/tcp"; 466 } else if (sa->sa_family == AF_INET6) { 467 knc.knc_semantics = NC_TPI_COTS; 468 knc.knc_protofmly = "inet6"; 469 knc.knc_proto = "tcp"; 470 devnam = "/dev/tcp6"; 471 } else { 472 DTRACE_PROBE2(nfss__cb__debug, char *, 473 "cbch_init: unknown transport family", int, sa->sa_family); 474 475 goto cb_init_out; 476 } 477 478 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 479 480 DTRACE_PROBE2(nfss__cb__debug, char *, 481 "cbch_init: lookupname failed", int, err); 482 483 goto cb_init_out; 484 } 485 486 if (vp->v_type != VCHR) { 487 488 DTRACE_PROBE2(nfss__cb__debug, char *, 489 "cbch_init: v_type not of type VCHR", char *, devnam); 490 VN_RELE(vp); 491 goto cb_init_out; 492 } 493 494 knc.knc_rdev = vp->v_rdev; 495 496 VN_RELE(vp); 497 498 if (err = clnt_tli_kcreate(&knc, nb, cb_program, 499 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 500 501 DTRACE_PROBE2(nfss__cb__debug, char *, 502 "cbch_init: clnt_tli_kcreate failed", int, err); 503 ch = NULL; 504 } 505 506 cb_init_out: 507 return (ch); 508 } 509 510 /* 511 * Given the information in the callback info struct, create a client 512 * handle that can be used by the server for its callback path. 513 */ 514 static CLIENT * 515 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 516 { 517 int useresvport = 0; 518 int af; 519 int size; 520 netaddr4 *naddr; 521 void *addr; 522 void *taddr; 523 in_port_t *pp; 524 struct sockaddr_in addr4; 525 struct sockaddr_in6 addr6; 526 struct netbuf nb; 527 CLIENT *ch = NULL; 528 529 DTRACE_PROBE2(nfss__cb__debug, char *, 530 "rfs4_cbch_init: entry cbp:", rfs4_cbinfo_t *, cbp); 531 532 mutex_enter(cbp->cb_lock); 533 534 naddr = (netaddr4 *)&cbp->cb_callback.cb_location; 535 536 if (naddr->na_r_netid == NULL || naddr->na_r_addr == NULL) { 537 goto ch_out; 538 } 539 540 if (strcmp(naddr->na_r_netid, "tcp") == 0) { 541 af = AF_INET; 542 size = sizeof (addr4); 543 bzero(&addr4, size); 544 addr4.sin_family = (sa_family_t)af; 545 addr = &addr4.sin_addr; 546 pp = &addr4.sin_port; 547 taddr = &addr4; 548 } else if (strcmp(naddr->na_r_netid, "tcp6") == 0) { 549 af = AF_INET6; 550 size = sizeof (addr6); 551 bzero(&addr6, size); 552 addr6.sin6_family = (sa_family_t)af; 553 addr = &addr6.sin6_addr; 554 pp = &addr6.sin6_port; 555 taddr = &addr6; 556 } else { 557 DTRACE_PROBE2(nfss__cb__debug, char *, 558 "rfs4_cbch_init: bad transport", char *, 559 cbp->cb_callback.cb_location.r_netid); 560 goto ch_out; 561 } 562 563 if (uaddr2sockaddr(af, naddr->na_r_addr, addr, pp)) { 564 565 DTRACE_PROBE2(nfss__cb__debug, char *, 566 "rfs4_cbch_init: malformed universal addr: ", 567 void *, naddr->na_r_addr); 568 569 goto ch_out; 570 } 571 572 573 nb.maxlen = nb.len = size; 574 nb.buf = (char *)taddr; 575 576 ch = cbch_init(&nb, cbp->cb_callback.cb_program); 577 578 /* turn off reserved port usage */ 579 if (ch != NULL) 580 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, 581 (char *)&useresvport); 582 ch_out: 583 mutex_exit(cbp->cb_lock); 584 return (ch); 585 } 586 587 /* 588 * Iterate over the client handle cache and 589 * destroy it. 590 */ 591 static void 592 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 593 { 594 CLIENT *ch; 595 596 while (cbp->cb_chc_free) { 597 cbp->cb_chc_free--; 598 ch = cbp->cb_chc[cbp->cb_chc_free]; 599 cbp->cb_chc[cbp->cb_chc_free] = NULL; 600 if (ch) { 601 if (ch->cl_auth) 602 auth_destroy(ch->cl_auth); 603 clnt_destroy(ch); 604 } 605 } 606 } 607 608 /* 609 * Return a client handle, either from a the small 610 * rfs4_client_t cache or one that we just created. 611 */ 612 static CLIENT * 613 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 614 { 615 CLIENT *cbch = NULL; 616 uint32_t zilch = 0; 617 618 mutex_enter(cbp->cb_lock); 619 620 if (cbp->cb_chc_free) { 621 cbp->cb_chc_free--; 622 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 623 mutex_exit(cbp->cb_lock); 624 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 625 return (cbch); 626 } 627 628 mutex_exit(cbp->cb_lock); 629 630 /* none free so make it now */ 631 cbch = rfs4_cbch_init(cbp); 632 633 return (cbch); 634 } 635 636 /* 637 * Return the client handle to the small cache or 638 * destroy it. 639 */ 640 static void 641 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 642 { 643 if (lockheld == FALSE) 644 mutex_enter(cbp->cb_lock); 645 646 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 647 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 648 if (lockheld == FALSE) 649 mutex_exit(cbp->cb_lock); 650 return; 651 } 652 if (lockheld == FALSE) 653 mutex_exit(cbp->cb_lock); 654 655 /* 656 * cache maxed out of free entries, obliterate 657 * this client handle, destroy it, throw it away. 658 */ 659 if (ch->cl_auth) 660 auth_destroy(ch->cl_auth); 661 clnt_destroy(ch); 662 } 663 664 static CLIENT * 665 rfs41_cb_chinit(uint32_t cbprog) 666 { 667 CLIENT *ch; 668 struct knetconfig knc; 669 int err; 670 671 /* 672 * The dest addr and parts of knc fields passed into 673 * clnt_tli_kcreate() are dummy. The connection is 674 * picked up later and RPC does not really use it to 675 * create connections for 4.1 callbacks. 676 */ 677 678 bzero(&knc, sizeof (struct knetconfig)); 679 680 /* 681 * knc_semantics is important to choose the 682 * right transport type. 683 */ 684 knc.knc_semantics = NC_TPI_COTS; 685 knc.knc_protofmly = "inet"; 686 knc.knc_proto = "tcp"; 687 688 if (err = clnt_tli_kcreate(&knc, 0, cbprog, NFS_CB, 0, 0, 689 curthread->t_cred, &ch)) { 690 DTRACE_PROBE2(nfss__cb__debug, char *, 691 "rfs41_cbch_init: clnt_tli_kcreate failed", int, err); 692 ch = NULL; 693 } 694 if (ch != NULL) 695 CLNT_CONTROL(ch, CLSET_CBCLIENT, NULL); 696 return (ch); 697 } 698 699 CLIENT * 700 rfs41_cb_getch(mds_session_t *sp) 701 { 702 CLIENT *cbch = NULL; 703 sess_channel_t *bcp; 704 sess_bcsd_t *bsdp; 705 706 rfs4_dbe_lock(sp->sn_dbe); 707 bcp = SNTOBC(sp); 708 rfs4_dbe_unlock(sp->sn_dbe); 709 710 rw_enter(&bcp->cn_lock, RW_READER); 711 bsdp = CTOBSD(bcp); 712 713 rw_enter(&bsdp->bsd_rwlock, RW_WRITER); 714 if (bsdp->bsd_ch_free) { 715 bsdp->bsd_ch_free--; 716 cbch = bsdp->bsd_clnt[bsdp->bsd_ch_free]; 717 } else { 718 cbch = rfs41_cb_chinit(sp->sn_bc.progno); 719 CLNT_CONTROL(cbch, CLSET_TAG, (void *)sp->sn_sessid); 720 } 721 722 rw_exit(&bsdp->bsd_rwlock); 723 rw_exit(&bcp->cn_lock); 724 return (cbch); 725 } 726 727 void 728 rfs41_cb_freech(mds_session_t *sp, CLIENT *ch) 729 { 730 sess_channel_t *bcp; 731 sess_bcsd_t *bsdp; 732 733 rfs4_dbe_lock(sp->sn_dbe); 734 bcp = SNTOBC(sp); 735 rfs4_dbe_unlock(sp->sn_dbe); 736 737 rw_enter(&bcp->cn_lock, RW_READER); 738 bsdp = CTOBSD(bcp); 739 740 rw_enter(&bsdp->bsd_rwlock, RW_WRITER); 741 if (bsdp->bsd_ch_free < RFS4_CBCH_MAX) { 742 bsdp->bsd_clnt[bsdp->bsd_ch_free++] = ch; 743 rw_exit(&bsdp->bsd_rwlock); 744 rw_exit(&bcp->cn_lock); 745 return; 746 } 747 748 rw_exit(&bsdp->bsd_rwlock); 749 rw_exit(&bcp->cn_lock); 750 751 /* 752 * cache maxed out of free entries, obliterate 753 * this client handle, destroy it, throw it away. 754 */ 755 if (ch->cl_auth) 756 auth_destroy(ch->cl_auth); 757 clnt_destroy(ch); 758 } 759 760 /* 761 * Iterate over the session's client handle cache and 762 * destroy it. 763 */ 764 void 765 rfs41_cb_chflush(mds_session_t *sp) 766 { 767 CLIENT *ch; 768 sess_channel_t *bcp; 769 sess_bcsd_t *bsdp; 770 771 rfs4_dbe_lock(sp->sn_dbe); 772 bcp = SNTOBC(sp); 773 rfs4_dbe_unlock(sp->sn_dbe); 774 775 rw_enter(&bcp->cn_lock, RW_READER); 776 bsdp = CTOBSD(bcp); 777 778 rw_enter(&bsdp->bsd_rwlock, RW_WRITER); 779 780 while (bsdp->bsd_ch_free) { 781 bsdp->bsd_ch_free--; 782 ch = bsdp->bsd_clnt[bsdp->bsd_ch_free]; 783 bsdp->bsd_clnt[bsdp->bsd_ch_free] = NULL; 784 if (ch) { 785 if (ch->cl_auth) 786 auth_destroy(ch->cl_auth); 787 clnt_destroy(ch); 788 } 789 } 790 791 rw_exit(&bsdp->bsd_rwlock); 792 } 793 794 /* 795 * With the supplied callback information - initialize the client 796 * callback data. If there is a callback in progress, save the 797 * callback info so that a thread can pick it up in the future. 798 */ 799 void 800 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 801 { 802 char *addr = NULL; 803 char *netid = NULL; 804 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 805 size_t len; 806 807 /* Set the call back for the client */ 808 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 809 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 810 len = strlen(cb->cb_location.r_addr) + 1; 811 addr = kmem_alloc(len, KM_SLEEP); 812 bcopy(cb->cb_location.r_addr, addr, len); 813 len = strlen(cb->cb_location.r_netid) + 1; 814 netid = kmem_alloc(len, KM_SLEEP); 815 bcopy(cb->cb_location.r_netid, netid, len); 816 } 817 /* ready to save the new information but first free old, if exists */ 818 mutex_enter(cbp->cb_lock); 819 820 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 821 822 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 823 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 824 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 825 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 826 827 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 828 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 829 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 830 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 831 832 cbp->cb_newer.cb_ident = cb_ident; 833 834 if (addr && *addr && netid && *netid) { 835 cbp->cb_newer.cb_new = TRUE; 836 cbp->cb_newer.cb_confirmed = FALSE; 837 } else { 838 cbp->cb_newer.cb_new = FALSE; 839 cbp->cb_newer.cb_confirmed = FALSE; 840 } 841 842 mutex_exit(cbp->cb_lock); 843 } 844 845 /* 846 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 847 * information may have been provided on SETCLIENTID and this call 848 * marks that information as confirmed and then starts a thread to 849 * test the callback path. 850 */ 851 void 852 rfs4_deleg_cb_check(rfs4_client_t *cp) 853 { 854 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 855 return; 856 857 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 858 859 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 860 861 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 862 minclsyspri); 863 } 864 865 static void 866 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 867 { 868 CB_RECALL4args *rec_argp; 869 870 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 871 if (rec_argp->fh.nfs_fh4_val) 872 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 873 } 874 875 /* XXX - this only works for one entry in the referring_call_list4 - rick */ 876 void 877 rfs41args_cb_sequence_free(nfs_cb_argop4 *argop) 878 { 879 CB_SEQUENCE4args *ap; 880 referring_call_list4 *rp; 881 uint_t len; 882 883 ap = &argop->nfs_cb_argop4_u.opcbsequence; 884 if ((rp = ap->csa_rcall_lval) != NULL) { 885 if (rp->rcl_val != NULL) { 886 len = rp->rcl_len; 887 kmem_free(rp->rcl_val, len * sizeof (referring_call4)); 888 rp->rcl_val = NULL; 889 } 890 len = ap->csa_rcall_llen; 891 kmem_free(rp, len * sizeof (referring_call_list4)); 892 ap->csa_rcall_lval = NULL; 893 } 894 } 895 896 /* ARGSUSED */ 897 static void 898 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 899 { 900 CB_GETATTR4args *argp; 901 902 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 903 if (argp->fh.nfs_fh4_val) 904 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 905 } 906 907 void 908 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 909 { 910 int i, arglen; 911 nfs_cb_argop4 *argop; 912 913 /* 914 * First free any special args alloc'd for specific ops. 915 */ 916 arglen = args->array_len; 917 argop = args->array; 918 for (i = 0; i < arglen; i++, argop++) { 919 920 switch (argop->argop) { 921 case OP_CB_SEQUENCE: 922 rfs41args_cb_sequence_free(argop); 923 break; 924 925 case OP_CB_RECALL: 926 rfs4args_cb_recall_free(argop); 927 break; 928 929 case OP_CB_GETATTR: 930 rfs4args_cb_getattr_free(argop); 931 break; 932 933 case OP_CB_LAYOUTRECALL: 934 break; 935 936 default: 937 return; 938 } 939 } 940 941 if (args->tag.utf8string_len > 0) 942 UTF8STRING_FREE(args->tag) 943 944 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 945 if (resp) 946 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 947 } 948 949 slotid4 950 svc_slot_maxslot(mds_session_t *sp) 951 { 952 slotid4 ms; 953 sess_channel_t *bcp; 954 sess_bcsd_t *bsdp; 955 956 rfs4_dbe_lock(sp->sn_dbe); 957 bcp = SNTOBC(sp); 958 rfs4_dbe_unlock(sp->sn_dbe); 959 960 rw_enter(&bcp->cn_lock, RW_READER); 961 if ((bsdp = CTOBSD(bcp)) == NULL) 962 cmn_err(CE_PANIC, "svc_slot_maxslot: BC Specific Data Not Set"); 963 964 rw_enter(&bsdp->bsd_rwlock, RW_READER); 965 slot_table_query(bsdp->bsd_stok, SLT_MAXSLOT, &ms); 966 rw_exit(&bsdp->bsd_rwlock); 967 968 rw_exit(&bcp->cn_lock); 969 return (ms); 970 } 971 972 /* 973 * Server-side slot allocations from BC's slot table. 974 */ 975 slot_ent_t * 976 svc_slot_alloc(mds_session_t *sp) 977 { 978 slot_ent_t *p; 979 sess_channel_t *bcp; 980 sess_bcsd_t *bsdp; 981 982 rfs4_dbe_lock(sp->sn_dbe); 983 bcp = SNTOBC(sp); 984 rfs4_dbe_unlock(sp->sn_dbe); 985 986 rw_enter(&bcp->cn_lock, RW_READER); 987 if ((bsdp = CTOBSD(bcp)) == NULL) 988 cmn_err(CE_PANIC, "svc_slot_alloc: BC Specific Data Not Set"); 989 990 rw_enter(&bsdp->bsd_rwlock, RW_READER); 991 (void) slot_alloc(bsdp->bsd_stok, SLT_SLEEP, &p); 992 rw_exit(&bsdp->bsd_rwlock); 993 994 rw_exit(&bcp->cn_lock); 995 return (p); 996 } 997 998 /* 999 * Server-side slot allocations from BC's slot table. 1000 */ 1001 void 1002 svc_slot_free(mds_session_t *sp, slot_ent_t *p) 1003 { 1004 sess_channel_t *bcp; 1005 sess_bcsd_t *bsdp; 1006 1007 ASSERT(sp != NULL); 1008 ASSERT(p != NULL); 1009 rfs4_dbe_lock(sp->sn_dbe); 1010 bcp = SNTOBC(sp); 1011 rfs4_dbe_unlock(sp->sn_dbe); 1012 1013 rw_enter(&bcp->cn_lock, RW_READER); 1014 if ((bsdp = CTOBSD(bcp)) == NULL) 1015 cmn_err(CE_PANIC, "svc_slot_free: BC Specific Data Not Set"); 1016 1017 rw_enter(&bsdp->bsd_rwlock, RW_READER); 1018 slot_free(bsdp->bsd_stok, p); 1019 rw_exit(&bsdp->bsd_rwlock); 1020 1021 rw_exit(&bcp->cn_lock); 1022 } 1023 1024 void 1025 svc_slot_cb_seqid(CB_COMPOUND4res *resp, slot_ent_t *p) 1026 { 1027 CB_SEQUENCE4res *rp; 1028 1029 if (resp == NULL || resp->array == NULL) 1030 return; 1031 1032 ASSERT(resp->array->resop == OP_CB_SEQUENCE); 1033 rp = &resp->array->nfs_cb_resop4_u.opcbsequence; 1034 if (rp->csr_status == NFS4_OK) { 1035 slot_incr_seq(p, 1); 1036 } 1037 } 1038 1039 /* 1040 * General callback routine for the server to the client. 1041 */ 1042 static enum clnt_stat 1043 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 1044 CB_COMPOUND4res *res, struct timeval timeout) 1045 { 1046 rfs4_cbinfo_t *cbp; 1047 CLIENT *ch; 1048 /* start with this in case cb_getch() fails */ 1049 enum clnt_stat stat = RPC_FAILED; 1050 1051 res->tag.utf8string_val = NULL; 1052 res->array = NULL; 1053 1054 retry: 1055 cbp = rfs4_cbinfo_hold(cp); 1056 if (cbp == NULL) 1057 return (stat); 1058 1059 /* get a client handle */ 1060 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 1061 /* 1062 * reset the cb_ident since it may have changed in 1063 * rfs4_cbinfo_hold() 1064 */ 1065 args->callback_ident = cbp->cb_ident; 1066 1067 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 1068 (caddr_t)args, xdr_CB_COMPOUND4res, 1069 (caddr_t)res, timeout); 1070 1071 /* free client handle */ 1072 rfs4_cb_freech(cbp, ch, FALSE); 1073 } 1074 1075 /* 1076 * If the rele says that there may be new callback info then 1077 * retry this sequence and it may succeed as a result of the 1078 * new callback path 1079 */ 1080 if (rfs4_cbinfo_rele(cbp, 1081 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 1082 goto retry; 1083 1084 return (stat); 1085 } 1086 1087 /* 1088 * Used by the NFSv4 server to get attributes for a file while 1089 * handling the case where a file has been write delegated. For the 1090 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 1091 * not undertaken. This call site is maintained in case the server is 1092 * updated in the future to handle write delegation space guarantees. 1093 */ 1094 nfsstat4 1095 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 1096 { 1097 1098 int error; 1099 1100 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 1101 return (puterrno4(error)); 1102 } 1103 1104 /* 1105 * This is used everywhere in the v2/v3 server to allow the 1106 * integration of all NFS versions and the support of delegation. For 1107 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 1108 * in the future to provide space guarantees for write delegations 1109 * then this call site should be expanded to interact with the client. 1110 */ 1111 int 1112 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 1113 { 1114 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 1115 } 1116 1117 /* 1118 * Place the actual cb_recall otw call to client. 1119 */ 1120 void 1121 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 1122 { 1123 CB_COMPOUND4args cb4_args; 1124 CB_COMPOUND4res cb4_res; 1125 CB_RECALL4args *rec_argp; 1126 CB_RECALL4res *rec_resp; 1127 nfs_cb_argop4 *argop; 1128 int numops; 1129 int argoplist_size; 1130 struct timeval timeout; 1131 nfs_fh4 *fhp; 1132 enum clnt_stat call_stat; 1133 1134 /* 1135 * set up the compound args 1136 */ 1137 numops = 1; /* CB_RECALL only */ 1138 1139 argoplist_size = numops * sizeof (nfs_cb_argop4); 1140 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 1141 argop->argop = OP_CB_RECALL; 1142 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 1143 1144 (void) str_to_utf8("cb_recall", &cb4_args.tag); 1145 cb4_args.minorversion = CB4_MINOR_v0; 1146 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 1147 cb4_args.array_len = numops; 1148 cb4_args.array = argop; 1149 1150 /* 1151 * fill in the args struct 1152 */ 1153 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 1154 rec_argp->truncate = trunc; 1155 1156 fhp = &dsp->rds_finfo->rf_filehandle; 1157 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 1158 fhp->nfs_fh4_len, KM_SLEEP); 1159 nfs_fh4_copy(fhp, &rec_argp->fh); 1160 1161 /* Keep track of when we did this for observability */ 1162 dsp->rds_time_recalled = gethrestime_sec(); 1163 1164 /* 1165 * Set up the timeout for the callback and make the actual call. 1166 * Timeout will be 80% of the lease period for this server. 1167 */ 1168 timeout.tv_sec = (dbe_to_instp(dsp->rds_dbe)->lease_period * 80) / 100; 1169 timeout.tv_usec = 0; 1170 1171 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 1172 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 1173 1174 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 1175 timeout); 1176 1177 rec_resp = (cb4_res.array_len == 0) ? NULL : 1178 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 1179 1180 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 1181 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 1182 1183 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 1184 rfs4_return_deleg(dsp, TRUE); 1185 } 1186 1187 rfs4freeargres(&cb4_args, &cb4_res); 1188 } 1189 1190 bool_t 1191 rfs41_file_still_delegated(rfs4_deleg_state_t *dsp) 1192 { 1193 rfs4_file_t *fp; 1194 1195 ASSERT(dsp != NULL); 1196 ASSERT(dsp->rds_finfo != NULL); 1197 fp = dsp->rds_finfo; 1198 1199 /* do we have a delegation on this file? */ 1200 rfs4_dbe_lock(fp->rf_dbe); 1201 if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { /* check type */ 1202 rfs4_dbe_unlock(fp->rf_dbe); 1203 return (FALSE); 1204 } 1205 1206 /* check deleg cnt */ 1207 if (list_next(&fp->rf_delegstatelist, dsp) == NULL) { 1208 rfs4_dbe_unlock(fp->rf_dbe); 1209 return (FALSE); 1210 } 1211 rfs4_dbe_unlock(fp->rf_dbe); 1212 return (TRUE); 1213 } 1214 1215 void 1216 rfs41_cb_seq_rcl_args(CB_SEQUENCE4args *ap, rfs4_deleg_state_t *dsp) 1217 { 1218 referring_call_list4 *rp; 1219 referring_call4 *rcp; 1220 1221 ASSERT(ap != NULL); 1222 1223 /* construct one entry in referring_call_list4 */ 1224 ap->csa_rcall_llen = 1; 1225 rp = (referring_call_list4 *)kmem_zalloc(sizeof (referring_call_list4), 1226 KM_SLEEP); 1227 ap->csa_rcall_lval = rp; 1228 1229 /* construct one referring_call4 entry in list above */ 1230 rp->rcl_len = 1; 1231 rcp = (referring_call4 *)kmem_zalloc(sizeof (referring_call4), 1232 KM_SLEEP); 1233 rp->rcl_val = rcp; 1234 1235 /* set the necessary arg fields */ 1236 bcopy(&dsp->rds_rs.sessid, &rp->rcl_sessionid, sizeof (sessionid4)); 1237 rcp->rc_sequenceid = dsp->rds_rs.seqid; 1238 rcp->rc_slotid = dsp->rds_rs.slotno; 1239 } 1240 1241 void 1242 rfs41_cb_path_down(mds_session_t *sp, uint32_t sonly) 1243 { 1244 uint32_t cp_flag = SEQ4_STATUS_CB_PATH_DOWN; 1245 uint32_t sn_flag = SEQ4_STATUS_CB_PATH_DOWN_SESSION; 1246 uint32_t idx = log2(sn_flag); 1247 1248 ASSERT(sp != NULL); 1249 ASSERT(sp->sn_clnt != NULL); 1250 1251 /* NB - refcnt for both these bits == active cb connections */ 1252 1253 /* session */ 1254 sp->sn_seq4[idx].ba_sonly = sonly; 1255 rfs41_seq4_rele(&sp->sn_seq4, sn_flag); 1256 1257 /* clientid */ 1258 rfs41_seq4_rele(&sp->sn_clnt->rc_seq4, cp_flag); 1259 } 1260 1261 /* 1262 * Place the actual cb_recall otw call to client. (using slot_XXX api) 1263 */ 1264 /*ARGSUSED*/ 1265 void 1266 mds_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 1267 { 1268 CB_COMPOUND4args cb4_args; 1269 CB_COMPOUND4res cb4_res; 1270 CB_SEQUENCE4args *cbsap; 1271 CB_RECALL4args *cbrap; 1272 mds_session_t *sp; 1273 slot_ent_t *p; 1274 nfs_cb_argop4 *argops; 1275 int numops; 1276 int argoplist_size; 1277 struct timeval timeout; 1278 nfs_fh4 *fhp; 1279 enum clnt_stat call_stat = RPC_FAILED; 1280 int zilch = 0; 1281 CLIENT *ch; 1282 int rcl = 0; /* referring call list */ 1283 int retried = 0; 1284 uint32_t sc = 0; /* session ctxt */ 1285 1286 /* 1287 * get the session id 1288 */ 1289 sp = mds_findsession_by_clid(dbe_to_instp(dsp->rds_dbe), 1290 dsp->rds_client->rc_clientid); 1291 if (sp == NULL) { 1292 /* 1293 * this shouldn't ever happen. if it does, just 1294 * increment a counter for now and return. 1295 */ 1296 mds_cbrecall_no_session++; 1297 return; 1298 } 1299 1300 /* 1301 * set up the compound args 1302 */ 1303 numops = 2; /* CB_SEQUENCE + CB_RECALL */ 1304 argoplist_size = numops * sizeof (nfs_cb_argop4); 1305 argops = kmem_zalloc(argoplist_size, KM_SLEEP); 1306 1307 argops[0].argop = OP_CB_SEQUENCE; 1308 cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence; 1309 1310 argops[1].argop = OP_CB_RECALL; 1311 cbrap = &argops[1].nfs_cb_argop4_u.opcbrecall; 1312 1313 (void) str_to_utf8("mds_cb_recall", &cb4_args.tag); 1314 cb4_args.minorversion = CB4_MINOR_v1; 1315 1316 cb4_args.callback_ident = sp->sn_bc.progno; 1317 cb4_args.array_len = numops; 1318 cb4_args.array = argops; 1319 1320 cb4_res.tag.utf8string_val = NULL; 1321 cb4_res.array = NULL; 1322 1323 /* 1324 * CB_SEQUENCE 1325 */ 1326 bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4)); 1327 p = svc_slot_alloc(sp); 1328 mutex_enter(&p->se_lock); 1329 cbsap->csa_slotid = p->se_sltno; 1330 cbsap->csa_sequenceid = p->se_seqid; 1331 cbsap->csa_highest_slotid = svc_slot_maxslot(sp); 1332 cbsap->csa_cachethis = FALSE; 1333 1334 /* 1335 * Section 2.10.5.3 (draft 23) 1336 * 1337 * case description refcnt 1338 * ---------------------------------- ------ 1339 * 1) rs state gets created (deleg granted) 1 1340 * slot is reused 0 1341 * 1342 * 2) rs state gets created (deleg granted) 1 1343 * cb_seq, cb_recall 2 1344 * <-- client replies to cb_recall 1 1345 * eventually, slot is reused 0 1346 * 1347 * 3) rs state gets created (deleg granted) 1 1348 * cb_seq, cb_recall 2 1349 * eventually, slot is reused 1 1350 * <-- client replies to cb_recall 0 1351 * 1352 * Cases 2 & 3 are covered here; case 1 covered as 1353 * part of a new request to op_sequence. 1354 */ 1355 if (dsp->rds_rs.refcnt == 0) { 1356 cbsap->csa_rcall_llen = 0; 1357 cbsap->csa_rcall_lval = NULL; 1358 } else { 1359 rfs41_deleg_rs_hold(dsp); 1360 rcl = 1; 1361 rfs41_cb_seq_rcl_args(cbsap, dsp); 1362 } 1363 mutex_exit(&p->se_lock); 1364 1365 /* 1366 * CB_RECALL 1367 */ 1368 bcopy(&dsp->rds_delegid.stateid, &cbrap->stateid, sizeof (stateid4)); 1369 cbrap->truncate = trunc; 1370 fhp = &dsp->rds_finfo->rf_filehandle; 1371 cbrap->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 1372 fhp->nfs_fh4_len, KM_SLEEP); 1373 nfs_fh4_copy(fhp, &cbrap->fh); 1374 1375 /* 1376 * Set up the timeout for the callback and make the actual call. 1377 * Timeout will be 80% of the lease period for this server. 1378 */ 1379 dsp->rds_time_recalled = gethrestime_sec(); /* observability */ 1380 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 1381 timeout.tv_usec = 0; 1382 1383 retry: 1384 ch = rfs41_cb_getch(sp); 1385 (void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch); 1386 call_stat = clnt_call(ch, CB_COMPOUND, 1387 xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args, 1388 xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout); 1389 rfs41_cb_freech(sp, ch); 1390 1391 /* 1392 * If the back channel is down, then mark session(s) appropriately 1393 * (SEQ4_STATUS_CB_PATH_DOWN). On NFS4ERR_DELAY, retry the callback 1394 * after a lease period; if that _still_ results in an error, revoke 1395 * the delegation and assert SEQ4_STATUS_RECALLABLE_STATE_REVOKED 1396 * section 10.4.5 (draft-23). As per Section 8.3 (d23), it's up to 1397 * the client to figure out 'which' stateid got revoked. 1398 */ 1399 if (call_stat != RPC_SUCCESS) { 1400 if (!retried) 1401 delay(SEC_TO_TICK(rfs4_lease_time)); 1402 1403 if (rfs41_file_still_delegated(dsp)) { 1404 if (!retried) { 1405 retried = 1; 1406 goto retry; 1407 } 1408 1409 /* 1410 * We want to make sure that the delegation is 1411 * still valid lest we assert a SEQ4 flag that 1412 * will never be turned off. 1413 */ 1414 rfs41_revoke_deleg(dsp); 1415 } 1416 sc = (call_stat == RPC_CANTSEND || call_stat == RPC_CANTRECV); 1417 rfs41_cb_path_down(sp, sc); 1418 goto done; 1419 1420 } else if (cb4_res.status != NFS4_OK) { 1421 switch (cb4_res.status) { 1422 case NFS4ERR_BADHANDLE: 1423 case NFS4ERR_BADXDR: 1424 case NFS4ERR_OP_NOT_IN_SESSION: 1425 case NFS4ERR_REQ_TOO_BIG: 1426 case NFS4ERR_TOO_MANY_OPS: 1427 /* What do we do when it's our own fault ? */ 1428 break; 1429 1430 /* XXX - rick: NFS4ERR_BAD_STATEID should also retry */ 1431 /* case NFS4ERR_BAD_STATEID: */ 1432 case NFS4ERR_DELAY: 1433 if (!retried) 1434 delay(SEC_TO_TICK(rfs4_lease_time)); 1435 1436 if (!rfs41_file_still_delegated(dsp)) 1437 break; 1438 1439 if (!retried) { 1440 retried = 1; 1441 goto retry; 1442 } 1443 /* FALLTHROUGH */ 1444 1445 case NFS4ERR_BAD_STATEID: /* XXX see above */ 1446 default: 1447 if (rfs41_file_still_delegated(dsp)) 1448 rfs41_revoke_deleg(dsp); 1449 break; 1450 } 1451 } 1452 svc_slot_cb_seqid(&cb4_res, p); 1453 done: 1454 if (rcl) 1455 rfs41_deleg_rs_rele(dsp); 1456 svc_slot_free(sp, p); 1457 1458 rfs4freeargres(&cb4_args, &cb4_res); 1459 rfs41_session_rele(sp); 1460 } 1461 1462 struct recall_arg { 1463 rfs4_deleg_state_t *dsp; 1464 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 1465 bool_t trunc; 1466 }; 1467 1468 static void 1469 do_recall(struct recall_arg *arg) 1470 { 1471 rfs4_deleg_state_t *dsp = arg->dsp; 1472 rfs4_file_t *fp = dsp->rds_finfo; 1473 callb_cpr_t cpr_info; 1474 kmutex_t cpr_lock; 1475 1476 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1477 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 1478 1479 /* 1480 * It is possible that before this thread starts 1481 * the client has send us a return_delegation, and 1482 * if that is the case we do not need to send the 1483 * recall callback. 1484 */ 1485 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 1486 DTRACE_PROBE3(nfss__i__recall, 1487 struct recall_arg *, arg, 1488 struct rfs4_deleg_state_t *, dsp, 1489 struct rfs4_file_t *, fp); 1490 1491 if (arg->recall) 1492 (void) (*arg->recall)(dsp, arg->trunc); 1493 } 1494 1495 mutex_enter(fp->rf_dinfo->rd_recall_lock); 1496 /* 1497 * Recall count may go negative if the parent thread that is 1498 * creating the individual callback threads does not modify 1499 * the recall_count field before the callback thread actually 1500 * gets a response from the CB_RECALL 1501 */ 1502 fp->rf_dinfo->rd_recall_count--; 1503 if (fp->rf_dinfo->rd_recall_count == 0) 1504 cv_signal(fp->rf_dinfo->rd_recall_cv); 1505 mutex_exit(fp->rf_dinfo->rd_recall_lock); 1506 1507 mutex_enter(&cpr_lock); 1508 CALLB_CPR_EXIT(&cpr_info); 1509 mutex_destroy(&cpr_lock); 1510 1511 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 1512 1513 kmem_free(arg, sizeof (struct recall_arg)); 1514 } 1515 1516 struct master_recall_args { 1517 rfs4_file_t *fp; 1518 void (*recall)(rfs4_deleg_state_t *, bool_t); 1519 bool_t trunc; 1520 }; 1521 1522 static void 1523 do_recall_file(struct master_recall_args *map) 1524 { 1525 rfs4_file_t *fp = map->fp; 1526 rfs4_deleg_state_t *dsp; 1527 struct recall_arg *arg; 1528 callb_cpr_t cpr_info; 1529 kmutex_t cpr_lock; 1530 int32_t recall_count; 1531 nfs_server_instance_t *instp; 1532 1533 rfs4_dbe_lock(fp->rf_dbe); 1534 1535 /* Recall already in progress ? */ 1536 mutex_enter(fp->rf_dinfo->rd_recall_lock); 1537 if (fp->rf_dinfo->rd_recall_count != 0) { 1538 mutex_exit(fp->rf_dinfo->rd_recall_lock); 1539 rfs4_dbe_rele_nolock(fp->rf_dbe); 1540 rfs4_dbe_unlock(fp->rf_dbe); 1541 kmem_free(map, sizeof (struct master_recall_args)); 1542 return; 1543 } 1544 1545 mutex_exit(fp->rf_dinfo->rd_recall_lock); 1546 1547 instp = dbe_to_instp(fp->rf_dbe); 1548 1549 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1550 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 1551 1552 /* 1553 * iterate over the file delegation list and 1554 * recall.. 1555 */ 1556 recall_count = 0; 1557 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1558 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1559 1560 rfs4_dbe_lock(dsp->rds_dbe); 1561 /* 1562 * if this delegation state 1563 * is being reaped skip it 1564 */ 1565 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 1566 rfs4_dbe_unlock(dsp->rds_dbe); 1567 continue; 1568 } 1569 1570 /* hold for receiving thread */ 1571 rfs4_dbe_hold(dsp->rds_dbe); 1572 rfs4_dbe_unlock(dsp->rds_dbe); 1573 1574 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1575 arg->recall = instp->deleg_cbrecall; 1576 arg->trunc = map->trunc; 1577 arg->dsp = dsp; 1578 1579 recall_count++; 1580 1581 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1582 minclsyspri); 1583 } 1584 1585 rfs4_dbe_unlock(fp->rf_dbe); 1586 1587 mutex_enter(fp->rf_dinfo->rd_recall_lock); 1588 /* 1589 * Recall count may go negative if the parent thread that is 1590 * creating the individual callback threads does not modify 1591 * the recall_count field before the callback thread actually 1592 * gets a response from the CB_RECALL 1593 */ 1594 fp->rf_dinfo->rd_recall_count += recall_count; 1595 while (fp->rf_dinfo->rd_recall_count) 1596 cv_wait(fp->rf_dinfo->rd_recall_cv, 1597 fp->rf_dinfo->rd_recall_lock); 1598 1599 mutex_exit(fp->rf_dinfo->rd_recall_lock); 1600 1601 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1602 rfs4_file_rele(fp); 1603 kmem_free(map, sizeof (struct master_recall_args)); 1604 mutex_enter(&cpr_lock); 1605 CALLB_CPR_EXIT(&cpr_info); 1606 mutex_destroy(&cpr_lock); 1607 } 1608 1609 static void 1610 rfs4_recall_file(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1611 { 1612 struct master_recall_args *args; 1613 1614 rfs4_dbe_lock(fp->rf_dbe); 1615 if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { 1616 rfs4_dbe_unlock(fp->rf_dbe); 1617 return; 1618 } 1619 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1620 1621 /* 1622 * Mark the time we started the recall processing. 1623 * If it has been previously recalled, do not reset the 1624 * timer since this is used for the revocation decision. 1625 */ 1626 if (fp->rf_dinfo->rd_time_recalled == 0) 1627 fp->rf_dinfo->rd_time_recalled = gethrestime_sec(); 1628 fp->rf_dinfo->rd_ever_recalled = TRUE; /* used for policy decision */ 1629 /* Client causing recall not always available */ 1630 if (cp) 1631 fp->rf_dinfo->rd_conflicted_client = cp->rc_clientid; 1632 1633 rfs4_dbe_unlock(fp->rf_dbe); 1634 1635 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1636 args->fp = fp; 1637 args->recall = NULL; 1638 args->trunc = trunc; 1639 1640 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1641 minclsyspri); 1642 } 1643 1644 void 1645 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1646 { 1647 time_t elapsed1, elapsed2; 1648 time_t lease; 1649 1650 lease = dbe_to_instp(fp->rf_dbe)->lease_period; 1651 1652 if (fp->rf_dinfo->rd_time_recalled != 0) { 1653 elapsed1 = gethrestime_sec() - fp->rf_dinfo->rd_time_recalled; 1654 elapsed2 = gethrestime_sec() - fp->rf_dinfo->rd_time_lastwrite; 1655 1656 /* First check to see if a revocation should occur */ 1657 if (elapsed1 > lease && elapsed2 > lease) { 1658 rfs4_revoke_file(fp); 1659 return; 1660 } 1661 /* 1662 * Next check to see if a recall should be done again 1663 * so quickly. 1664 */ 1665 if (elapsed1 <= ((lease * 20) / 100)) 1666 return; 1667 } 1668 rfs4_recall_file(fp, trunc, cp); 1669 } 1670 1671 /* 1672 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1673 * open conflicts with the delegation. 1674 * Return true if we need recall otherwise false. 1675 * Assumes entry locks for sp and sp->rs_finfo are held. 1676 */ 1677 bool_t 1678 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1679 { 1680 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo->rd_dtype; 1681 1682 switch (dtype) { 1683 case OPEN_DELEGATE_NONE: 1684 /* Not currently delegated so there is nothing to do */ 1685 return (FALSE); 1686 case OPEN_DELEGATE_READ: 1687 /* 1688 * If the access is only asking for READ then there is 1689 * no conflict and nothing to do. If it is asking 1690 * for write, then there will be conflict and the read 1691 * delegation should be recalled. 1692 */ 1693 if (access == OPEN4_SHARE_ACCESS_READ) 1694 return (FALSE); 1695 else 1696 return (TRUE); 1697 case OPEN_DELEGATE_WRITE: 1698 /* Check to see if this client has the delegation */ 1699 return (rfs4_is_deleg(sp)); 1700 } 1701 1702 return (FALSE); 1703 } 1704 1705 /* 1706 * Return the "best" allowable delegation available given the current 1707 * delegation type and the desired access and deny modes on the file. 1708 * At the point that this routine is called we know that the access and 1709 * deny modes are consistent with the file modes. 1710 */ 1711 static open_delegation_type4 1712 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1713 { 1714 open_delegation_type4 dtype = fp->rf_dinfo->rd_dtype; 1715 uint32_t access = sp->rs_share_access; 1716 uint32_t deny = sp->rs_share_deny; 1717 int readcnt = 0; 1718 int writecnt = 0; 1719 1720 switch (dtype) { 1721 case OPEN_DELEGATE_NONE: 1722 /* 1723 * Determine if more than just this OPEN have the file 1724 * open and if so, no delegation may be provided to 1725 * the client. 1726 */ 1727 if (access & OPEN4_SHARE_ACCESS_WRITE) 1728 writecnt++; 1729 if (access & OPEN4_SHARE_ACCESS_READ) 1730 readcnt++; 1731 1732 if (fp->rf_access_read > readcnt || 1733 fp->rf_access_write > writecnt) 1734 return (OPEN_DELEGATE_NONE); 1735 1736 /* 1737 * If the client is going to write, or if the client 1738 * has exclusive access, return a write delegation. 1739 */ 1740 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1741 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1742 return (OPEN_DELEGATE_WRITE); 1743 /* 1744 * If we don't want to write or we've haven't denied read 1745 * access to others, return a read delegation. 1746 */ 1747 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1748 (deny & ~OPEN4_SHARE_DENY_READ)) 1749 return (OPEN_DELEGATE_READ); 1750 1751 /* Shouldn't get here */ 1752 return (OPEN_DELEGATE_NONE); 1753 1754 case OPEN_DELEGATE_READ: 1755 /* 1756 * If the file is delegated for read but we wan't to 1757 * write or deny others to read then we can't delegate 1758 * the file. We shouldn't get here since the delegation should 1759 * have been recalled already. 1760 */ 1761 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1762 (deny & OPEN4_SHARE_DENY_READ)) 1763 return (OPEN_DELEGATE_NONE); 1764 return (OPEN_DELEGATE_READ); 1765 1766 case OPEN_DELEGATE_WRITE: 1767 return (OPEN_DELEGATE_WRITE); 1768 } 1769 1770 /* Shouldn't get here */ 1771 return (OPEN_DELEGATE_NONE); 1772 } 1773 1774 /* 1775 * Given the desired delegation type and the "history" of the file 1776 * determine the actual delegation type to return. 1777 */ 1778 static open_delegation_type4 1779 rfs4_delegation_policy(nfs_server_instance_t *instp, 1780 open_delegation_type4 dtype, 1781 rfs4_dinfo_t *dinfo, clientid4 cid) 1782 { 1783 time_t elapsed; 1784 1785 if (instp->deleg_policy != SRV_NORMAL_DELEGATE) 1786 return (OPEN_DELEGATE_NONE); 1787 1788 /* 1789 * Has this file/delegation ever been recalled? If not then 1790 * no further checks for a delegation race need to be done. 1791 * However if a recall has occurred, then check to see if a 1792 * client has caused its own delegation recall to occur. If 1793 * not, then has a delegation for this file been returned 1794 * recently? If so, then do not assign a new delegation to 1795 * avoid a "delegation race" between the original client and 1796 * the new/conflicting client. 1797 */ 1798 if (dinfo->rd_ever_recalled == TRUE) { 1799 if (dinfo->rd_conflicted_client != cid) { 1800 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1801 if (elapsed < instp->lease_period) 1802 return (OPEN_DELEGATE_NONE); 1803 } 1804 } 1805 1806 /* Limit the number of read grants */ 1807 if (dtype == OPEN_DELEGATE_READ && 1808 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1809 return (OPEN_DELEGATE_NONE); 1810 1811 /* 1812 * Should consider limiting total number of read/write 1813 * delegations the server will permit. 1814 */ 1815 1816 return (dtype); 1817 } 1818 1819 /* 1820 * Try and grant a delegation for an open give the state. The routine 1821 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1822 * 1823 * The state and associate file entry must be locked 1824 */ 1825 rfs4_deleg_state_t * 1826 rfs4_grant_delegation(struct compound_state *cs, 1827 delegreq_t dreq, rfs4_state_t *sp, int *recall) 1828 { 1829 rfs4_file_t *fp = sp->rs_finfo; 1830 open_delegation_type4 dtype; 1831 int no_delegation; 1832 1833 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1834 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1835 1836 /* Is the server even providing delegations? */ 1837 if (cs->instp->deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1838 return (NULL); 1839 1840 /* Check to see if delegations have been temporarily disabled */ 1841 mutex_enter(&cs->instp->deleg_lock); 1842 no_delegation = cs->instp->deleg_disabled; 1843 mutex_exit(&cs->instp->deleg_lock); 1844 1845 if (no_delegation) 1846 return (NULL); 1847 1848 /* Don't grant a delegation if a deletion is impending. */ 1849 if (fp->rf_dinfo->rd_hold_grant > 0) { 1850 return (NULL); 1851 } 1852 1853 /* 1854 * Don't grant a delegation if there are any lock manager 1855 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1856 * if there are only read locks we should be able to grant a 1857 * read-only delegation), but it's good enough for now. 1858 * 1859 * MT safety: the lock manager checks for conflicting delegations 1860 * before processing a lock request. That check will block until 1861 * we are done here. So if the lock manager acquires a lock after 1862 * we decide to grant the delegation, the delegation will get 1863 * immediately recalled (if there's a conflict), so we're safe. 1864 */ 1865 if (lm_vp_active(fp->rf_vp)) { 1866 return (NULL); 1867 } 1868 1869 /* 1870 * Based on the type of delegation request passed in, take the 1871 * appropriate action (DELEG_NONE is handled above) 1872 */ 1873 switch (dreq) { 1874 1875 case DELEG_READ: 1876 case DELEG_WRITE: 1877 /* 1878 * The server "must" grant the delegation in this case. 1879 * Client is using open previous 1880 */ 1881 dtype = (open_delegation_type4)dreq; 1882 *recall = 1; 1883 break; 1884 case DELEG_ANY: 1885 /* 1886 * If a valid callback path does not exist, no delegation may 1887 * be granted. 1888 */ 1889 if ((*cs->instp->deleg_cbcheck)(sp) != CB_OK) 1890 return (NULL); 1891 1892 /* 1893 * If the original operation which caused time_rm_delayed 1894 * to be set hasn't been retried and completed for one 1895 * full lease period, clear it and allow delegations to 1896 * get granted again. 1897 */ 1898 if (fp->rf_dinfo->rd_time_rm_delayed > 0 && 1899 gethrestime_sec() > 1900 fp->rf_dinfo->rd_time_rm_delayed + cs->instp->lease_period) 1901 fp->rf_dinfo->rd_time_rm_delayed = 0; 1902 1903 /* 1904 * If we are waiting for a delegation to be returned then 1905 * don't delegate this file. We do this for correctness as 1906 * well as if the file is being recalled we would likely 1907 * recall this file again. 1908 */ 1909 1910 if (fp->rf_dinfo->rd_time_recalled != 0 || 1911 fp->rf_dinfo->rd_time_rm_delayed != 0) 1912 return (NULL); 1913 1914 /* Get the "best" delegation candidate */ 1915 dtype = rfs4_check_delegation(sp, fp); 1916 1917 if (dtype == OPEN_DELEGATE_NONE) 1918 return (NULL); 1919 1920 /* 1921 * Based on policy and the history of the file get the 1922 * actual delegation. 1923 */ 1924 dtype = rfs4_delegation_policy(cs->instp, dtype, fp->rf_dinfo, 1925 sp->rs_owner->ro_client->rc_clientid); 1926 1927 if (dtype == OPEN_DELEGATE_NONE) 1928 return (NULL); 1929 break; 1930 default: 1931 return (NULL); 1932 } 1933 1934 /* set the delegation for the state */ 1935 return (rfs4_deleg_state(cs, sp, dtype, recall)); 1936 } 1937 1938 void 1939 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1940 nfsace4 *ace, int recall) 1941 { 1942 open_write_delegation4 *wp; 1943 open_read_delegation4 *rp; 1944 nfs_space_limit4 *spl; 1945 nfsace4 nace; 1946 1947 /* 1948 * We need to allocate a new copy of the who string. 1949 * this string will be freed by the rfs4_op_open dis_resfree 1950 * routine. We need to do this allocation since replays will 1951 * be allocated and rfs4_compound can't tell the difference from 1952 * a replay and an inital open. N.B. if an ace is passed in, it 1953 * the caller's responsibility to free it. 1954 */ 1955 1956 if (ace == NULL) { 1957 /* 1958 * Default is to deny all access, the client will have 1959 * to contact the server. XXX Do we want to actually 1960 * set a deny for every one, or do we simply want to 1961 * construct an entity that will match no one? 1962 */ 1963 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1964 nace.flag = 0; 1965 nace.access_mask = ACE4_VALID_MASK_BITS; 1966 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1967 } else { 1968 nace.type = ace->type; 1969 nace.flag = ace->flag; 1970 nace.access_mask = ace->access_mask; 1971 (void) utf8_copy(&ace->who, &nace.who); 1972 } 1973 1974 dp->delegation_type = dsp->rds_dtype; 1975 1976 switch (dsp->rds_dtype) { 1977 case OPEN_DELEGATE_NONE: 1978 break; 1979 case OPEN_DELEGATE_READ: 1980 rp = &dp->open_delegation4_u.read; 1981 rp->stateid = dsp->rds_delegid.stateid; 1982 rp->recall = (bool_t)recall; 1983 rp->permissions = nace; 1984 break; 1985 case OPEN_DELEGATE_WRITE: 1986 wp = &dp->open_delegation4_u.write; 1987 wp->stateid = dsp->rds_delegid.stateid; 1988 wp->recall = (bool_t)recall; 1989 spl = &wp->space_limit; 1990 spl->limitby = NFS_LIMIT_SIZE; 1991 spl->nfs_space_limit4_u.filesize = 0; 1992 wp->permissions = nace; 1993 break; 1994 } 1995 } 1996 1997 /* 1998 * Check if the file is delegated via the provided file struct. 1999 * Return TRUE if it is delegated. This is intended for use by 2000 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 2001 * 2002 * Note that if the file is found to have a delegation, it is 2003 * recalled, unless the clientid of the caller matches the clientid of the 2004 * delegation. If the caller has specified, there is a slight delay 2005 * inserted in the hopes that the delegation will be returned quickly. 2006 */ 2007 bool_t 2008 rfs4_check_delegated_byfp(nfs_server_instance_t *instp, 2009 int mode, rfs4_file_t *fp, bool_t trunc, bool_t do_delay, 2010 bool_t is_rm, clientid4 *cp) 2011 { 2012 rfs4_deleg_state_t *dsp; 2013 2014 /* Is delegation enabled? */ 2015 if (instp->deleg_policy == SRV_NEVER_DELEGATE) 2016 return (FALSE); 2017 2018 /* do we have a delegation on this file? */ 2019 rfs4_dbe_lock(fp->rf_dbe); 2020 if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { 2021 if (is_rm) 2022 fp->rf_dinfo->rd_hold_grant++; 2023 rfs4_dbe_unlock(fp->rf_dbe); 2024 return (FALSE); 2025 } 2026 /* 2027 * do we have a write delegation on this file or are we 2028 * requesting write access to a file with any type of existing 2029 * delegation? 2030 */ 2031 if (mode == FWRITE || fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) { 2032 if (cp != NULL) { 2033 dsp = list_head(&fp->rf_delegstatelist); 2034 if (dsp == NULL) { 2035 rfs4_dbe_unlock(fp->rf_dbe); 2036 return (FALSE); 2037 } 2038 /* 2039 * Does the requestor already own the delegation? 2040 */ 2041 if (dsp->rds_client->rc_clientid == *(cp)) { 2042 rfs4_dbe_unlock(fp->rf_dbe); 2043 return (FALSE); 2044 } 2045 } 2046 2047 rfs4_dbe_unlock(fp->rf_dbe); 2048 rfs4_recall_deleg(fp, trunc, NULL); 2049 2050 if (!do_delay) { 2051 rfs4_dbe_lock(fp->rf_dbe); 2052 fp->rf_dinfo->rd_time_rm_delayed = gethrestime_sec(); 2053 rfs4_dbe_unlock(fp->rf_dbe); 2054 return (TRUE); 2055 } 2056 2057 delay(NFS4_DELEGATION_CONFLICT_DELAY); 2058 2059 rfs4_dbe_lock(fp->rf_dbe); 2060 if (fp->rf_dinfo->rd_dtype != OPEN_DELEGATE_NONE) { 2061 fp->rf_dinfo->rd_time_rm_delayed = gethrestime_sec(); 2062 rfs4_dbe_unlock(fp->rf_dbe); 2063 return (TRUE); 2064 } 2065 } 2066 if (is_rm) 2067 fp->rf_dinfo->rd_hold_grant++; 2068 rfs4_dbe_unlock(fp->rf_dbe); 2069 return (FALSE); 2070 } 2071 2072 2073 /* 2074 * Check if the file is delegated in the case of a v2 or v3 access. 2075 * Return TRUE if it is delegated which in turn means that v2 should 2076 * drop the request and in the case of v3 JUKEBOX should be returned. 2077 */ 2078 bool_t 2079 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc, bool_t do_delay, 2080 bool_t is_rm, void *vcp) 2081 { 2082 int delegd = 0; 2083 bool_t create = FALSE; 2084 rfs4_file_t *fp; 2085 clientid4 *cp = (clientid4 *)vcp; 2086 nfs_server_instance_t *nsip; 2087 2088 /* iterrate through the instances */ 2089 rw_enter(&nsi_lock, RW_READER); 2090 for (nsip = list_head(&nsi_head); 2091 nsip != NULL && !delegd; 2092 nsip = list_next(&nsi_head, &nsip->nsi_list)) { 2093 2094 mutex_enter(&nsip->state_lock); 2095 2096 if ((nsip->inst_flags & NFS_INST_STORE_INIT) && 2097 (nsip->deleg_policy != SRV_NEVER_DELEGATE)) { 2098 2099 fp = rfs4_findfile(nsip, vp, NULL, &create); 2100 if (fp != NULL) { 2101 if (rfs4_check_delegated_byfp(nsip, mode, fp, 2102 trunc, do_delay, is_rm, cp)) 2103 delegd++; 2104 rfs4_file_rele(fp); 2105 } 2106 } 2107 mutex_exit(&nsip->state_lock); 2108 } 2109 rw_exit(&nsi_lock); 2110 return (delegd ? TRUE : FALSE); 2111 } 2112 2113 /* 2114 * Release a hold on the hold_grant counter which 2115 * prevents delegation from being granted while a remove 2116 * or a rename is in progress. 2117 */ 2118 void 2119 rfs4_clear_dont_grant(nfs_server_instance_t *instp, 2120 rfs4_file_t *fp) 2121 { 2122 if (instp->deleg_policy == SRV_NEVER_DELEGATE) 2123 return; 2124 rfs4_dbe_lock(fp->rf_dbe); 2125 ASSERT(fp->rf_dinfo->rd_hold_grant > 0); 2126 fp->rf_dinfo->rd_hold_grant--; 2127 fp->rf_dinfo->rd_time_rm_delayed = 0; 2128 rfs4_dbe_unlock(fp->rf_dbe); 2129 } 2130 2131 /* 2132 * State support for delegation. 2133 * Set the state delegation type for this state; 2134 * This routine is called from open via rfs4_grant_delegation and the entry 2135 * locks on sp and sp->rs_finfo are assumed. 2136 */ 2137 static rfs4_deleg_state_t * 2138 rfs4_deleg_state(struct compound_state *cs, 2139 rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 2140 { 2141 rfs4_file_t *fp = sp->rs_finfo; 2142 bool_t create = TRUE; 2143 rfs4_deleg_state_t *dsp; 2144 vnode_t *vp; 2145 int open_prev = *recall; 2146 int ret; 2147 int fflags = 0; 2148 2149 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 2150 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 2151 2152 /* Shouldn't happen */ 2153 if (fp->rf_dinfo->rd_recall_count != 0 || 2154 (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_READ && 2155 dtype != OPEN_DELEGATE_READ)) { 2156 return (NULL); 2157 } 2158 2159 /* Unlock to avoid deadlock */ 2160 rfs4_dbe_unlock(fp->rf_dbe); 2161 rfs4_dbe_unlock(sp->rs_dbe); 2162 2163 dsp = rfs4_finddeleg(cs, sp, &create); 2164 2165 rfs4_dbe_lock(sp->rs_dbe); 2166 rfs4_dbe_lock(fp->rf_dbe); 2167 2168 if (dsp == NULL) 2169 return (NULL); 2170 2171 /* 2172 * It is possible that since we dropped the lock 2173 * in order to call finddeleg, the rfs4_file_t 2174 * was marked such that we should not grant a 2175 * delegation, if so bail out. 2176 */ 2177 if (fp->rf_dinfo->rd_hold_grant > 0) { 2178 rfs4_deleg_state_rele(dsp); 2179 return (NULL); 2180 } 2181 2182 if (create == FALSE) { 2183 if (sp->rs_owner->ro_client == dsp->rds_client && 2184 dsp->rds_dtype == dtype) { 2185 return (dsp); 2186 } else { 2187 rfs4_deleg_state_rele(dsp); 2188 return (NULL); 2189 } 2190 } 2191 2192 /* 2193 * Check that this file has not been delegated to another 2194 * client 2195 */ 2196 if (fp->rf_dinfo->rd_recall_count != 0 || 2197 fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE || 2198 (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_READ && 2199 dtype != OPEN_DELEGATE_READ)) { 2200 rfs4_deleg_state_rele(dsp); 2201 return (NULL); 2202 } 2203 2204 vp = fp->rf_vp; 2205 /* vnevent_support returns 0 if file system supports vnevents */ 2206 if (vnevent_support(vp, NULL)) { 2207 rfs4_deleg_state_rele(dsp); 2208 return (NULL); 2209 } 2210 2211 /* Calculate the fflags for this OPEN. */ 2212 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 2213 fflags |= FREAD; 2214 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 2215 fflags |= FWRITE; 2216 2217 *recall = 0; 2218 /* 2219 * Before granting a delegation we need to know if anyone else has 2220 * opened the file in a conflicting mode. However, first we need to 2221 * know how we opened the file to check the counts properly. 2222 */ 2223 if (dtype == OPEN_DELEGATE_READ) { 2224 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 2225 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 2226 vn_is_mapped(vp, V_WRITE)) { 2227 if (open_prev) { 2228 *recall = 1; 2229 } else { 2230 rfs4_deleg_state_rele(dsp); 2231 return (NULL); 2232 } 2233 } 2234 ret = fem_install(vp, cs->instp->deleg_rdops, (void *)fp, 2235 OPARGUNIQ, rfs4_mon_hold, rfs4_mon_rele); 2236 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 2237 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 2238 vn_is_mapped(vp, V_WRITE)) { 2239 if (open_prev) { 2240 *recall = 1; 2241 } else { 2242 (void) fem_uninstall(vp, cs->instp->deleg_rdops, 2243 (void *)fp); 2244 rfs4_deleg_state_rele(dsp); 2245 return (NULL); 2246 } 2247 } 2248 /* 2249 * Because a client can hold onto a delegation after the 2250 * file has been closed, we need to keep track of the 2251 * access to this file. Otherwise the CIFS server would 2252 * not know about the client accessing the file and could 2253 * inappropriately grant an OPLOCK. 2254 * fem_install() returns EBUSY when asked to install a 2255 * OPARGUNIQ monitor more than once. Therefore, check the 2256 * return code because we only want this done once. 2257 */ 2258 if (ret == 0) 2259 vn_open_upgrade(vp, FREAD); 2260 } else { /* WRITE */ 2261 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 2262 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 2263 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 2264 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 2265 vn_is_mapped(vp, V_RDORWR)) { 2266 if (open_prev) { 2267 *recall = 1; 2268 } else { 2269 rfs4_deleg_state_rele(dsp); 2270 return (NULL); 2271 } 2272 } 2273 ret = fem_install(vp, cs->instp->deleg_wrops, (void *)fp, 2274 OPARGUNIQ, rfs4_mon_hold, rfs4_mon_rele); 2275 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 2276 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 2277 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 2278 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 2279 vn_is_mapped(vp, V_RDORWR)) { 2280 if (open_prev) { 2281 *recall = 1; 2282 } else { 2283 (void) fem_uninstall(vp, cs->instp->deleg_wrops, 2284 (void *)fp); 2285 rfs4_deleg_state_rele(dsp); 2286 return (NULL); 2287 } 2288 } 2289 /* 2290 * Because a client can hold onto a delegation after the 2291 * file has been closed, we need to keep track of the 2292 * access to this file. Otherwise the CIFS server would 2293 * not know about the client accessing the file and could 2294 * inappropriately grant an OPLOCK. 2295 * fem_install() returns EBUSY when asked to install a 2296 * OPUNIQ monitor more than once. Therefore, check the 2297 * return code because we only want this done once. 2298 */ 2299 if (ret == 0) 2300 vn_open_upgrade(vp, FREAD|FWRITE); 2301 } 2302 2303 /* 2304 * Place on delegation list for file 2305 */ 2306 ASSERT(!list_link_active(&dsp->rds_node)); 2307 list_insert_tail(&fp->rf_delegstatelist, dsp); 2308 2309 dsp->rds_dtype = fp->rf_dinfo->rd_dtype = dtype; 2310 2311 /* Update delegation stats for this file */ 2312 fp->rf_dinfo->rd_time_lastgrant = gethrestime_sec(); 2313 2314 /* reset since this is a new delegation */ 2315 fp->rf_dinfo->rd_conflicted_client = 0; 2316 fp->rf_dinfo->rd_ever_recalled = FALSE; 2317 2318 if (dtype == OPEN_DELEGATE_READ) 2319 fp->rf_dinfo->rd_rdgrants++; 2320 else 2321 fp->rf_dinfo->rd_wrgrants++; 2322 2323 return (dsp); 2324 } 2325 2326 /* 2327 * State routine for the server when a delegation is returned. 2328 */ 2329 void 2330 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 2331 { 2332 rfs4_file_t *fp = dsp->rds_finfo; 2333 open_delegation_type4 dtypewas; 2334 nfs_server_instance_t *instp; 2335 2336 rfs4_dbe_lock(fp->rf_dbe); 2337 2338 /* nothing to do if no longer on list */ 2339 if (!list_link_active(&dsp->rds_node)) { 2340 rfs4_dbe_unlock(fp->rf_dbe); 2341 return; 2342 } 2343 2344 /* Remove state from recall list */ 2345 list_remove(&fp->rf_delegstatelist, dsp); 2346 2347 instp = dbe_to_instp(fp->rf_dbe); 2348 if (instp->inst_flags & NFS_INST_v41) { 2349 mds_session_t *sp; 2350 slotid4 slot; 2351 slot_ent_t *slp; 2352 extern void rfs41_rs_erase(void *); 2353 2354 if (dsp->rds_rs.refcnt > 0) { 2355 /* 2356 * refcnt > 0, so this means we still have an active 2357 * hold on deleg_state. If (for some reason) we don't 2358 * find the sp, the worse that'll happen is that we'll 2359 * leak some state (ie. won't be able to clean up the 2360 * hold). But nothing to get too excited about. 2361 */ 2362 slot = dsp->rds_rs.slotno; 2363 sp = mds_findsession_by_id(instp, dsp->rds_rs.sessid); 2364 if (sp != NULL) { 2365 rfs4_dbe_lock(sp->sn_dbe); 2366 ASSERT(sp->sn_replay != NULL); 2367 slp = slrc_slot_get(sp->sn_replay, slot); 2368 if (slp->se_p == dsp) { 2369 rfs41_rs_erase(dsp); 2370 slp->se_p = NULL; 2371 } 2372 rfs4_dbe_unlock(sp->sn_dbe); 2373 rfs41_session_rele(sp); 2374 } 2375 } 2376 } 2377 2378 /* 2379 * If no more delegations then remove the FEM 2380 * monitors 2381 */ 2382 if (list_is_empty(&fp->rf_delegstatelist)) { 2383 dtypewas = fp->rf_dinfo->rd_dtype; 2384 fp->rf_dinfo->rd_dtype = OPEN_DELEGATE_NONE; 2385 rfs4_dbe_cv_broadcast(fp->rf_dbe); 2386 2387 /* if file system was unshared, the vp will be NULL */ 2388 if (fp->rf_vp != NULL) { 2389 /* 2390 * Once a delegation is no longer held by any client, 2391 * the monitor is uninstalled. At this point, the 2392 * client must send OPEN otw, so we don't need the 2393 * reference on the vnode anymore. The open 2394 * downgrade removes the reference put on earlier. 2395 */ 2396 if (dtypewas == OPEN_DELEGATE_READ) { 2397 (void) fem_uninstall(fp->rf_vp, 2398 instp->deleg_rdops, (void *)fp); 2399 vn_open_downgrade(fp->rf_vp, FREAD); 2400 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 2401 (void) fem_uninstall(fp->rf_vp, 2402 instp->deleg_wrops, (void *)fp); 2403 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 2404 } 2405 } 2406 } 2407 2408 switch (dsp->rds_dtype) { 2409 case OPEN_DELEGATE_READ: 2410 fp->rf_dinfo->rd_rdgrants--; 2411 break; 2412 case OPEN_DELEGATE_WRITE: 2413 fp->rf_dinfo->rd_wrgrants--; 2414 break; 2415 default: 2416 break; 2417 } 2418 2419 /* used in the policy decision */ 2420 fp->rf_dinfo->rd_time_returned = gethrestime_sec(); 2421 2422 /* 2423 * reset the time_recalled field so future delegations are not 2424 * accidentally revoked 2425 */ 2426 if ((fp->rf_dinfo->rd_rdgrants + fp->rf_dinfo->rd_wrgrants) == 0) 2427 fp->rf_dinfo->rd_time_recalled = 0; 2428 2429 rfs4_dbe_unlock(fp->rf_dbe); 2430 2431 rfs4_dbe_lock(dsp->rds_dbe); 2432 2433 dsp->rds_dtype = OPEN_DELEGATE_NONE; 2434 2435 if (revoked == TRUE) 2436 dsp->rds_time_revoked = gethrestime_sec(); 2437 2438 rfs4_dbe_invalidate(dsp->rds_dbe); 2439 2440 rfs4_dbe_unlock(dsp->rds_dbe); 2441 2442 if (revoked == TRUE) { 2443 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 2444 dsp->rds_client->rc_deleg_revoked++; /* observability */ 2445 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 2446 } 2447 } 2448 2449 static void 2450 rfs4_revoke_deleg(rfs4_deleg_state_t *dsp) 2451 { 2452 rfs4_return_deleg(dsp, TRUE); 2453 } 2454 2455 static void 2456 rfs41_revoke_deleg(rfs4_deleg_state_t *dsp) 2457 { 2458 cmn_err(CE_NOTE, "rfs41_revoke_deleg: delegation revoked"); 2459 rfs41_seq4_hold(&dsp->rds_client->rc_seq4, 2460 SEQ4_STATUS_RECALLABLE_STATE_REVOKED); 2461 rfs4_revoke_deleg(dsp); 2462 } 2463 2464 static void 2465 rfs4_revoke_file(rfs4_file_t *fp) 2466 { 2467 rfs4_deleg_state_t *dsp; 2468 2469 /* 2470 * The lock for rfs4_file_t must be held when traversing the 2471 * delegation list but that lock needs to be released to call 2472 * rfs4_revoke_deleg(). 2473 * 2474 * The called function rfs4_revoke_deleg removes the entry 2475 * from the fp delegation list, so the while loop will keep 2476 * looping until the list is empty. 2477 */ 2478 rfs4_dbe_lock(fp->rf_dbe); 2479 while (dsp = list_head(&fp->rf_delegstatelist)) { 2480 rfs4_dbe_hold(dsp->rds_dbe); 2481 rfs4_dbe_unlock(fp->rf_dbe); 2482 rfs4_revoke_deleg(dsp); 2483 rfs4_deleg_state_rele(dsp); 2484 rfs4_dbe_lock(fp->rf_dbe); 2485 } 2486 rfs4_dbe_unlock(fp->rf_dbe); 2487 } 2488 2489 /* 2490 * A delegation is assumed to be present on the file associated with 2491 * "sp". Check to see if the delegation matches is associated with 2492 * the same client as referenced by "sp". If it is not, TRUE is 2493 * returned. If the delegation DOES match the client (or no 2494 * delegation is present), return FALSE. 2495 * Assume the state entry and file entry are locked. 2496 * 2497 * This routine only checks the delegations of the calling server instance. 2498 * Since this is only called from rfs4_check_recall(), which is only called 2499 * by rfs4_do_open() and mds_do_open(), they only need to check if they own 2500 * this delegation. All other conflict detection will be done by the monitor 2501 * on OPEN. 2502 */ 2503 bool_t 2504 rfs4_is_deleg(rfs4_state_t *sp) 2505 { 2506 rfs4_deleg_state_t *dsp; 2507 rfs4_file_t *fp = sp->rs_finfo; 2508 rfs4_client_t *cp = sp->rs_owner->ro_client; 2509 2510 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 2511 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 2512 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 2513 if (cp != dsp->rds_client) 2514 return (TRUE); 2515 } 2516 2517 return (FALSE); 2518 } 2519 2520 void 2521 rfs4_disable_delegation(nfs_server_instance_t *instp) 2522 { 2523 mutex_enter(&instp->deleg_lock); 2524 instp->deleg_disabled++; 2525 mutex_exit(&instp->deleg_lock); 2526 } 2527 2528 void 2529 rfs4_enable_delegation(nfs_server_instance_t *instp) 2530 { 2531 mutex_enter(&instp->deleg_lock); 2532 ASSERT(instp->deleg_disabled > 0); 2533 instp->deleg_disabled--; 2534 mutex_exit(&instp->deleg_lock); 2535 } 2536 2537 void 2538 rfs4_mon_hold(void *arg) 2539 { 2540 rfs4_file_t *fp = arg; 2541 2542 rfs4_dbe_hold(fp->rf_dbe); 2543 } 2544 2545 void 2546 rfs4_mon_rele(void *arg) 2547 { 2548 rfs4_file_t *fp = arg; 2549 2550 rfs4_dbe_rele_nolock(fp->rf_dbe); 2551 } 2552