1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <sys/sdt.h> 28 #include <sys/ddi.h> 29 #include <rpc/types.h> 30 #include <sys/cmn_err.h> 31 #include <rpc/auth.h> 32 #include <rpc/auth_unix.h> 33 #include <rpc/auth_des.h> 34 #include <rpc/svc.h> 35 #include <rpc/xdr.h> 36 #include <nfs/nfs4_kprot.h> 37 #include <nfs/nfs_dispatch.h> 38 #include <nfs/nfs4.h> 39 #include <nfs/mds_state.h> 40 #include <nfs/nfssys.h> 41 #include <nfs/ds.h> 42 #include <nfs/spe_impl.h> 43 #include <sys/utsname.h> 44 #include <sys/systeminfo.h> 45 46 extern int inet_pton(int, char *, void *); 47 48 rfs4_client_t *mds_findclient(nfs_client_id4 *, bool_t *, rfs4_client_t *); 49 50 static void nullfree(void); 51 static void ds_reportavail_free(DS_REPORTAVAILres *); 52 static void ds_checkstate_free(DS_CHECKSTATEres *); 53 54 void ds_map_mds_dataset_id(DS_MAP_MDS_DATASET_IDargs *, 55 DS_MAP_MDS_DATASET_IDres *, struct svc_req *); 56 void ds_checkstate(DS_CHECKSTATEargs *, DS_CHECKSTATEres *, struct svc_req *); 57 void ds_renew(DS_RENEWargs *, DS_RENEWres *, struct svc_req *); 58 void ds_reportavail(DS_REPORTAVAILargs *, DS_REPORTAVAILres *, 59 struct svc_req *); 60 void ds_exchange(DS_EXIBIargs *, DS_EXIBIres *, struct svc_req *); 61 void ds_sec_info(DS_SECINFOargs *, DS_SECINFOres *, struct svc_req *); 62 void ds_fmatpt(DS_FMATPTargs *, DS_FMATPTres *, struct svc_req *); 63 void ds_shutdown(DS_SHUTDOWNargs *, DS_SHUTDOWNres *, struct svc_req *); 64 65 void nfs_ds_cp_dispatch(struct svc_req *, SVCXPRT *); 66 67 static enum ds_status get_ds_status(nfsstat4); 68 static void get_access_mode(compound_state_t *, DS_CHECKSTATEres *); 69 ds_owner_t *mds_dsinfo_alloc(DS_EXIBIargs *); 70 71 /* 72 * XXX 73 * This variable is used to select regular NFS server behaviour 74 * (no DSs) vs. the need to use proxy I/O to read/write data 75 * from the DSs. At some point, this needs to be replaced by 76 * a per-export setting that indicates whether data is local 77 * or remote, so that we can handle both pNFS and locally- 78 * provisioned UFS or other data. 79 */ 80 int nfs_ds_present = 0; /* Has a DS checked in yet? */ 81 82 /* 83 * Dispatch structure for the control protocol 84 */ 85 struct nfs_cp_disp { 86 void (*proc)(); 87 xdrproc_t decode_args; 88 xdrproc_t encode_reply; 89 void (*resfree)(); 90 char *name; 91 }; 92 93 union nfs_ds_cp_sarg { 94 DS_EXIBIargs ds_exchange; 95 DS_CHECKSTATEargs ds_checkstate; 96 DS_RENEWargs ds_renew; 97 DS_REPORTAVAILargs ds_reportavail; 98 DS_MAP_MDS_DATASET_IDargs ds_map_mds_dataset_id; 99 DS_SECINFOargs ds_secinfo; 100 DS_FMATPTargs ds_fmatpt; 101 DS_SHUTDOWNargs ds_shutdown; 102 }; 103 104 union nfs_ds_cp_sres { 105 DS_EXIBIres ds_exchange; 106 DS_CHECKSTATEres ds_checkstate; 107 DS_RENEWres ds_renew; 108 DS_REPORTAVAILres ds_reportavail; 109 DS_MAP_MDS_DATASET_IDres ds_map_mds_dataset_id; 110 DS_SECINFOres ds_secinfo; 111 DS_FMATPTres ds_fmatpt; 112 DS_SHUTDOWNres ds_shutdown; 113 }; 114 115 struct nfs_cp_disp nfs_ds_cp_v1[] = { 116 {NULL, NULL, NULL, NULL, NULL}, /* RPC Null */ 117 {ds_checkstate, xdr_DS_CHECKSTATEargs, xdr_DS_CHECKSTATEres, 118 ds_checkstate_free, "DS_Checkstate"}, 119 {ds_exchange, xdr_DS_EXIBIargs, xdr_DS_EXIBIres, 120 nullfree, "DS_EXIBI"}, 121 {ds_fmatpt, xdr_DS_FMATPTargs, xdr_DS_FMATPTres, 122 nullfree, "DS_FmaTpt"}, 123 {ds_map_mds_dataset_id, xdr_DS_MAP_MDS_DATASET_IDargs, 124 xdr_DS_MAP_MDS_DATASET_IDres, nullfree, "DS_MapMdsDatasetId"}, 125 {NULL, NULL, NULL, NULL, "DS_MapMdsSid"}, 126 {ds_renew, xdr_DS_RENEWargs, xdr_DS_RENEWres, nullfree, "DS_Renew"}, 127 {ds_reportavail, xdr_DS_REPORTAVAILargs, xdr_DS_REPORTAVAILres, 128 ds_reportavail_free, "DS_ReportAvail"}, 129 {ds_sec_info, xdr_DS_SECINFOargs, xdr_DS_SECINFOres, 130 nullfree, "DS_SecInfo"}, 131 {ds_shutdown, xdr_DS_SHUTDOWNargs, xdr_DS_SHUTDOWNres, 132 nullfree, "DS_ShutDown"} 133 }; 134 135 static uint_t nfs_ds_cp_cnt = 136 sizeof (nfs_ds_cp_v1) / sizeof (struct nfs_cp_disp); 137 138 #define NFS_CP_ILLEGAL_PROC (nfs_ds_cp_cnt) 139 140 /* 141 * XXX: The layout field of the response is not being filled, and hence 142 * will not be freed here. The for loop will not be entered. 143 */ 144 static void 145 ds_checkstate_free(DS_CHECKSTATEres *resp) 146 { 147 int i; 148 uint_t lo_len; 149 layout4 *lo_val; 150 uint_t loc_len; 151 char *loc_val; 152 ds_filestate *fs; 153 154 fs = &(resp->DS_CHECKSTATEres_u.file_state); 155 if (resp->status == DS_OK && fs != NULL) { 156 lo_len = fs->layout.layout_len; 157 lo_val = fs->layout.layout_val; 158 159 for (i = 0; i < lo_len; i++) { 160 loc_len = lo_val[i].lo_content.loc_body.loc_body_len; 161 loc_val = lo_val[i].lo_content.loc_body.loc_body_val; 162 kmem_free(loc_val, loc_len); 163 kmem_free(&lo_val[i], sizeof (layout4)); 164 } 165 } 166 } 167 168 static void 169 ds_reportavail_free(DS_REPORTAVAILres *resp) 170 { 171 int i, j; 172 173 DS_REPORTAVAILresok *res_ok; 174 mds_sid *sid_array; 175 uint32_t sid_array_len; 176 177 if (resp->status != DS_OK) 178 return; 179 180 res_ok = &(resp->DS_REPORTAVAILres_u.res_ok); 181 182 /* Free the contents of the guid_map array */ 183 for (i = 0; i < res_ok->guid_map.guid_map_len; i++) { 184 sid_array = 185 res_ok->guid_map.guid_map_val[i].mds_sid_array. 186 mds_sid_array_val; 187 sid_array_len = 188 res_ok->guid_map.guid_map_val[i].mds_sid_array. 189 mds_sid_array_len; 190 191 /* Free the contents of the mds_sid_array */ 192 for (j = 0; j < sid_array_len; j++) { 193 /* Free the mds_sid_content */ 194 kmem_free(sid_array[j].val, 195 sid_array[j].len); 196 } 197 198 /* Free the mds_sid */ 199 kmem_free(sid_array, sid_array_len * sizeof (mds_sid)); 200 } 201 202 /* Free the guid_map */ 203 kmem_free(res_ok->guid_map.guid_map_val, 204 res_ok->guid_map.guid_map_len * 205 sizeof (struct ds_guid_map)); 206 } 207 208 static void 209 nullfree(void) 210 { 211 } 212 213 mds_ds_fh * 214 get_mds_ds_fh(nfs_fh4 *otw_fh) 215 { 216 XDR x; 217 mds_ds_fh *fh; 218 219 xdrmem_create(&x, otw_fh->nfs_fh4_val, 220 otw_fh->nfs_fh4_len, XDR_DECODE); 221 222 fh = kmem_zalloc(sizeof (mds_ds_fh), KM_SLEEP); 223 224 if (!xdr_ds_fh_fmt(&x, fh)) { 225 free_mds_ds_fh(fh); 226 return (NULL); 227 } 228 return (fh); 229 } 230 231 vnode_t * 232 ds_fhtovp(mds_ds_fh *fhp, ds_status *statp) 233 { 234 vnode_t *vp = NULL; 235 int error; 236 fsid_t *fs_id = (fsid_t *)fhp->fh.v1.mds_dataset_id.val; 237 fid_t fidp; 238 vfs_t *vfsp; 239 240 vfsp = getvfs(fs_id); 241 if (vfsp == NULL) { 242 *statp = DSERR_BADHANDLE; 243 return (NULL); 244 } 245 246 fidp.fid_len = fhp->fh.v1.mds_fid.len; 247 248 bcopy(fhp->fh.v1.mds_fid.val, 249 fidp.fid_data, fidp.fid_len); 250 251 error = VFS_VGET(vfsp, &vp, &fidp); 252 253 /* release the hold from getvfs() */ 254 VFS_RELE(vfsp); 255 256 if (error != 0) { 257 *statp = DSERR_BADHANDLE; 258 return (NULL); 259 } 260 261 *statp = DS_OK; 262 return (vp); 263 } 264 265 rfs4_file_t * 266 mds_findfile_by_dsfh(nfs_server_instance_t *instp, mds_ds_fh *fhp) 267 { 268 ds_status stat; 269 vnode_t *vp; 270 rfs4_file_t *fp; 271 272 /* map ds_fh to vp */ 273 vp = ds_fhtovp(fhp, &stat); 274 if (vp == NULL) 275 return (NULL); 276 277 mutex_enter(&vp->v_vsd_lock); 278 fp = (rfs4_file_t *)vsd_get(vp, instp->vkey); 279 mutex_exit(&vp->v_vsd_lock); 280 281 if (fp == NULL) 282 return (NULL); 283 284 rfs4_dbe_lock(fp->rf_dbe); 285 if (rfs4_dbe_is_invalid(fp->rf_dbe) || 286 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) { 287 rfs4_dbe_unlock(fp->rf_dbe); 288 return (NULL); 289 } 290 291 rfs4_dbe_hold(fp->rf_dbe); 292 rfs4_dbe_unlock(fp->rf_dbe); 293 return (fp); 294 } 295 296 /* 297 * Convert the NFS error into a control protocol error to be returned with 298 * control protocol response. Converse of get_nfs_status on the data server 299 */ 300 static enum ds_status 301 get_ds_status(nfsstat4 stat) 302 { 303 ds_status status; 304 switch (stat) { 305 case NFS4ERR_INVAL: 306 status = DSERR_INVAL; 307 break; 308 case NFS4ERR_EXPIRED: 309 status = DSERR_EXPIRED; 310 break; 311 case NFS4ERR_STALE_STATEID: 312 status = DSERR_STALE_STATEID; 313 break; 314 case NFS4ERR_OPENMODE: 315 status = DSERR_ACCESS; 316 break; 317 case NFS4ERR_BAD_STATEID: 318 status = DSERR_BAD_STATEID; 319 break; 320 case NFS4ERR_OLD_STATEID: 321 status = DSERR_OLD_STATEID; 322 break; 323 case NFS4ERR_GRACE: 324 status = DSERR_GRACE; 325 break; 326 default: 327 status = DSERR_RESOURCE; 328 break; 329 } 330 331 return (status); 332 } 333 334 /* 335 * Get access mode from rfs4_file_t. 336 */ 337 static void 338 get_access_mode(compound_state_t *cs, DS_CHECKSTATEres *resp) 339 { 340 rfs4_file_t *fp; 341 bool_t create = FALSE; 342 343 fp = rfs4_findfile(cs->instp, cs->vp, NULL, &create); 344 if (fp == NULL) { 345 resp->status = DSERR_BADHANDLE; 346 return; 347 } 348 rfs4_dbe_lock(fp->rf_dbe); 349 resp->DS_CHECKSTATEres_u.file_state.open_mode = fp->rf_share_access; 350 rfs4_dbe_unlock(fp->rf_dbe); 351 rfs4_file_rele(fp); 352 } 353 354 /* ARGSUSED */ 355 void 356 ds_checkstate(DS_CHECKSTATEargs *argp, DS_CHECKSTATEres *resp, 357 struct svc_req *req) 358 { 359 compound_state_t *cs; 360 mds_ds_fh *dfhp = NULL; 361 nfsstat4 stat; 362 nnode_error_t error; 363 bool_t deleg; 364 nnode_t *np; 365 clientid4 clientid; 366 vnode_t *vp; 367 rfs4_file_t *fp; 368 369 bzero(resp, sizeof (*resp)); 370 371 /* 372 * Decode the OTW DS file handle. 373 */ 374 if ((dfhp = get_mds_ds_fh(&argp->fh)) == NULL) { 375 resp->status = DSERR_BADHANDLE; 376 return; 377 } 378 379 /* 380 * Sanity check. Ensure that we are dealing with a DS file handle. 381 */ 382 if (dfhp->type != FH41_TYPE_DMU_DS || dfhp->vers != DS_FH_v1) { 383 free_mds_ds_fh(dfhp); 384 resp->status = DSERR_BADHANDLE; 385 return; 386 } 387 388 /* 389 * Convert the ds file handle to a vnode. vnode is required by 390 * check_stateid. 391 */ 392 vp = ds_fhtovp(dfhp, &resp->status); 393 free_mds_ds_fh(dfhp); 394 395 /* 396 * We steal the reference from VFS_VGET in ds_fhtovp, so do not need to 397 * do VN_HOLD explicity. VN_RELE happens when the compound_state_t gets 398 * back to the kmem_cache via rfs41_compound_state_free. 399 */ 400 if (vp == NULL) { 401 resp->status = DSERR_BADHANDLE; 402 return; 403 } 404 405 /* 406 * We need to invoke the check_stateid through the nnode interface. 407 * Currently we do not have a method for deriving an nnode from a DS 408 * filehandle. Hence, we are using vnodes. 409 */ 410 error = nnode_from_vnode(&np, vp); 411 if (error != 0) { 412 VN_RELE(vp); 413 resp->status = DSERR_BADHANDLE; 414 return; 415 } 416 417 /* 418 * Allocate a compound struct, needed by the function 419 * that gets called via the nnode interface. 420 */ 421 cs = rfs41_compound_state_alloc(mds_server); 422 cs->vp = vp; 423 cs->nn = np; 424 425 /* 426 * Do a checkstate via nnode interface. 427 * XXX: The nnop_check_stateid function will call nso_checkstate which 428 * is mapped to the v4.0 check_stateid. This works for now because the 429 * way the stateids are being generated. However, when the stateids get 430 * generated with proper v4.1 bits, then either a different function 431 * needs to be called, or the check_stateid has to be enhanced to deal 432 * with v4.1 bits as well. 433 */ 434 deleg = FALSE; 435 if ((stat = nnop_check_stateid(np, cs, argp->mode, &argp->stateid, 436 FALSE, &deleg, TRUE, NULL, &clientid)) != NFS4_OK) { 437 resp->status = get_ds_status(stat); 438 rfs41_compound_state_free(cs); 439 return; 440 } 441 442 /* 443 * Copy the clientid that is returned from the check_stateid in the 444 * response. 445 */ 446 resp->DS_CHECKSTATEres_u.file_state.mds_clid = clientid; 447 448 /* 449 * Obtain file access mode, which is returned as part of the response. 450 */ 451 get_access_mode(cs, resp); 452 453 deleg = FALSE; /* ugly reuse */ 454 fp = rfs4_findfile(cs->instp, vp, NULL, &deleg); 455 if (fp != NULL) { 456 /* 457 * If layout has not been written to stable storage, 458 * then do so before issuing the reply. 459 */ 460 if (mds_put_layout(fp->rf_mlo, fp->rf_vp)) { 461 rfs4_file_rele(fp); 462 rfs41_compound_state_free(cs); 463 /* 464 * DSERR_RESOURCE? DSERR_NOSPC? 465 */ 466 resp->status = DSERR_SERVERFAULT; 467 return; 468 } 469 rfs4_file_rele(fp); 470 resp->status = DS_OK; 471 } else { 472 resp->status = DSERR_SERVERFAULT; 473 } 474 475 /* 476 * XXX: Todo List 477 * Validate security flavor. 478 * Authenticate. 479 * Return layout information. 480 */ 481 482 rfs41_compound_state_free(cs); 483 } 484 485 /* 486 * Data Server wants to know pathname at MDS for 487 * specified object. 488 */ 489 490 /* ARGSUSED */ 491 void 492 ds_map_mds_dataset_id(DS_MAP_MDS_DATASET_IDargs *argp, 493 DS_MAP_MDS_DATASET_IDres *resp, struct svc_req *req) 494 { 495 /* we're done! */ 496 resp->status = DSERR_NOTSUPP; 497 } 498 499 ds_owner_t * 500 mds_find_ds_owner_by_id(ds_id ds_id) 501 { 502 ds_owner_t *dop = NULL; 503 bool_t create = FALSE; 504 505 rw_enter(&mds_server->ds_owner_lock, RW_READER); 506 dop = (ds_owner_t *)rfs4_dbsearch(mds_server->ds_owner_idx, 507 (void *)(uintptr_t)ds_id, 508 &create, NULL, RFS4_DBS_VALID); 509 rw_exit(&mds_server->ds_owner_lock); 510 511 return (dop); 512 } 513 514 /* ARGSUSED */ 515 void 516 mds_ds_rebooted(ds_owner_t *dop) 517 { 518 /* 519 * XXX: clean up MDSs' DS state held or something! 520 */ 521 } 522 523 524 /* ARGSUSED */ 525 void 526 ds_renew(DS_RENEWargs *argp, DS_RENEWres *resp, struct svc_req *rqstp) 527 { 528 ds_owner_t *dop; 529 530 /* do some basic sanity checks */ 531 if (argp->ds_id == 0) { 532 resp->status = DSERR_INVAL; 533 return; 534 } 535 536 dop = mds_find_ds_owner_by_id(argp->ds_id); 537 if (dop == NULL) { 538 resp->status = DSERR_STALE_DSID; 539 return; 540 } 541 542 rfs4_dbe_lock(dop->dbe); 543 dop->last_access = gethrestime_sec(); 544 if (dop->verifier != argp->ds_boottime) { 545 dop->dsi_flags |= MDS_DSI_REBOOTED; 546 dop->verifier = argp->ds_boottime; 547 } 548 rfs4_dbe_unlock(dop->dbe); 549 550 /* if needed call mds_ds_rebooted() to do cleanup. */ 551 552 resp->DS_RENEWres_u.mds_boottime = mds_server->Write4verf; 553 resp->status = DS_OK; 554 555 rfs4_dbe_rele(dop->dbe); /* search */ 556 } 557 558 /* ARGSUSED */ 559 void 560 ds_sec_info(DS_SECINFOargs *argp, DS_SECINFOres *resp, struct svc_req *rqstp) 561 { 562 /* 563 * insert server code here 564 */ 565 resp->status = DSERR_NOTSUPP; 566 } 567 568 /* ARGSUSED */ 569 void 570 ds_fmatpt(DS_FMATPTargs *argp, DS_FMATPTres *resp, struct svc_req *rqstp) 571 { 572 /* 573 * insert server code here 574 */ 575 resp->status = DSERR_NOTSUPP; 576 } 577 578 /* ARGSUSED */ 579 void 580 ds_shutdown(DS_SHUTDOWNargs *argp, DS_SHUTDOWNres *resp, struct svc_req *rqstp) 581 { 582 /* 583 * insert server code here 584 */ 585 resp->status = DSERR_NOTSUPP; 586 } 587 588 int 589 ds_get_remote_uaddr(struct svc_req *rp, char *buf, int with_port) 590 { 591 const char *kinet_ntop6(uchar_t *, char *, size_t); 592 593 struct sockaddr *sap; 594 struct sockaddr_in6 *sin6; 595 struct sockaddr_in *sin; 596 uchar_t *b; 597 char udder[INET6_ADDRSTRLEN]; 598 599 ASSERT(rp); 600 601 sap = (struct sockaddr *)svc_getrpccaller(rp->rq_xprt)->buf; 602 603 if (sap == NULL) { 604 return (DS_INVAL); 605 } 606 607 switch (sap->sa_family) { 608 case AF_INET: 609 sin = (struct sockaddr_in *)sap; 610 611 b = (uchar_t *)&sin->sin_addr; 612 if (with_port) 613 (void) sprintf(buf, "%d.%d.%d.%d.%2d.%2d", 614 b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, 615 b[3] & 0xFF, 616 sin->sin_port >> 8, 617 sin->sin_port & 255); 618 else 619 (void) sprintf(buf, "%d.%d.%d.%d", 620 b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, 621 b[3] & 0xFF); 622 623 break; 624 625 case AF_INET6: 626 sin6 = (struct sockaddr_in6 *)sap; 627 628 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, 629 udder, INET6_ADDRSTRLEN); 630 if (with_port) 631 (void) sprintf(buf, "%s.%02d.%02d", udder, 632 sin6->sin6_port >> 8, 633 sin6->sin6_port & 255); 634 else 635 (void) sprintf(buf, "%s", udder); 636 default: 637 return (DS_INVAL); 638 } 639 return (DS_OK); 640 } 641 642 /* ARGSUSED */ 643 ds_guid_info_t * 644 ds_guid_info_add(ds_owner_t *dop, struct ds_storinfo *si) 645 { 646 pinfo_create_t pic_arg; 647 ds_guid_info_t *pgi; 648 649 bool_t create = FALSE; 650 651 pic_arg.ds_owner = dop; 652 pic_arg.si = si; 653 654 rw_enter(&mds_server->ds_guid_info_lock, RW_WRITER); 655 pgi = (ds_guid_info_t *)rfs4_dbsearch( 656 mds_server->ds_guid_info_idx, 657 (void *)&si->ds_storinfo_u.zfs_info.guid_map.ds_guid, 658 &create, (void *)&pic_arg, RFS4_DBS_VALID); 659 660 /* 661 * Get rid of the old one. 662 */ 663 if (pgi) { 664 rfs4_dbe_rele(pgi->dbe); 665 rfs4_dbe_invalidate(pgi->dbe); 666 } 667 668 pgi = (ds_guid_info_t *)rfs4_dbcreate( 669 mds_server->ds_guid_info_idx, 670 (void *)&pic_arg); 671 672 rw_exit(&mds_server->ds_guid_info_lock); 673 674 return (pgi); 675 } 676 677 char * 678 kstrdup(const char *s) 679 { 680 size_t len; 681 char *new; 682 683 len = strlen(s); 684 new = kmem_alloc(len + 1, KM_SLEEP); 685 bcopy(s, new, len); 686 new[len] = '\0'; 687 688 return (new); 689 } 690 691 static char * 692 uaddr_trunc_port(char *uaddr) 693 { 694 int pos, dc; 695 char *port_less = NULL; 696 697 if (uaddr == NULL) 698 return (NULL); 699 700 pos = strlen(uaddr); 701 702 for (dc = 2; pos > 0; pos--) { 703 if (uaddr[pos] == '.') { 704 dc--; 705 if (dc == 0) { 706 uaddr[pos] = 0; 707 port_less = kstrdup(uaddr); 708 uaddr[pos] = '.'; 709 break; 710 } 711 } 712 } 713 return (port_less); 714 } 715 716 ds_owner_t * 717 mds_find_ds_owner(DS_EXIBIargs *args, bool_t *create) 718 { 719 ds_owner_t *dop = NULL; 720 721 /* 722 * using the data-server identity string find 723 * the ds_owner structure 724 */ 725 rw_enter(&mds_server->ds_owner_lock, RW_WRITER); 726 dop = (ds_owner_t *)rfs4_dbsearch(mds_server->ds_owner_inst_idx, 727 (void *)args->ds_ident.instance.instance_val, 728 create, (void *)args, RFS4_DBS_VALID); 729 rw_exit(&mds_server->ds_owner_lock); 730 731 return (dop); 732 } 733 734 void 735 mds_ds_address_to_key(ds_addrlist_t *dp, int af) 736 { 737 int len; 738 int t; 739 740 char *address, *port; 741 742 address = kstrdup(dp->dev_addr.na_r_addr); 743 len = strlen(address) + 1; 744 745 /* 746 * These are network addresses + port information 747 * We don't care about the format for the network 748 * address, but we do care that the port is 749 * described by .NUM.NUM after it. 750 */ 751 port = strrchr(address, '.'); 752 if (!port) 753 goto error; 754 755 *port++ = '\0'; 756 t = stoi(&port); 757 dp->ds_port_key = t; 758 759 port = strrchr(address, '.'); 760 if (!port) 761 goto error; 762 763 *port++ = '\0'; 764 t = stoi(&port); 765 dp->ds_port_key |= t << 8; 766 767 t = inet_pton(af, address, &dp->ds_addr_key); 768 if (t != 1) 769 goto error; 770 771 kmem_free(address, len); 772 return; 773 774 error: 775 dp->ds_addr_key = 0; 776 dp->ds_port_key = 0; 777 778 kmem_free(address, len); 779 } 780 781 /* 782 * mds_ds_initnet builds a knetconfig structure for the 783 * netid, address and port. 784 */ 785 int 786 mds_ds_initnet(ds_addrlist_t *dp) 787 { 788 struct sockaddr_in *addr4; 789 struct sockaddr_in6 *addr6; 790 char *devname; 791 vnode_t *vp; 792 int error; 793 int af; 794 int newknc = 0, newnb = 0; 795 796 if (dp->dev_knc == NULL) { 797 newknc = 1; 798 dp->dev_knc = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP); 799 } 800 801 dp->dev_knc->knc_semantics = NC_TPI_COTS; 802 if (strcmp(dp->dev_addr.na_r_netid, "tcp") == 0) { 803 dp->dev_knc->knc_protofmly = "inet"; 804 dp->dev_knc->knc_proto = "tcp"; 805 devname = "/dev/tcp"; 806 af = AF_INET; 807 } else if (strcmp(dp->dev_addr.na_r_netid, "tcp6") == 0) { 808 dp->dev_knc->knc_protofmly = "inet6"; 809 dp->dev_knc->knc_proto = "tcp"; /* why not tcp6? */ 810 devname = "/dev/tcp6"; 811 af = AF_INET6; 812 } else { 813 error = EINVAL; 814 goto out; 815 } 816 817 error = lookupname(devname, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 818 if (error) 819 goto out; 820 if (vp->v_type != VCHR) { 821 VN_RELE(vp); 822 error = EINVAL; 823 goto out; 824 } 825 dp->dev_knc->knc_rdev = vp->v_rdev; 826 VN_RELE(vp); 827 828 if (dp->dev_nb == NULL) { 829 newnb = 1; 830 dp->dev_nb = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 831 } else if (dp->dev_nb->buf) 832 kmem_free(dp->dev_nb->buf, dp->dev_nb->maxlen); 833 834 if (af == AF_INET) { 835 dp->dev_nb->maxlen = dp->dev_nb->len = 836 sizeof (struct sockaddr_in); 837 dp->dev_nb->buf = kmem_zalloc(dp->dev_nb->maxlen, KM_SLEEP); 838 addr4 = (struct sockaddr_in *)dp->dev_nb->buf; 839 addr4->sin_family = af; 840 error = uaddr2sockaddr(af, dp->dev_addr.na_r_addr, 841 &addr4->sin_addr, &addr4->sin_port); 842 } else { /* AF_INET6 */ 843 dp->dev_nb->maxlen = dp->dev_nb->len = 844 sizeof (struct sockaddr_in6); 845 dp->dev_nb->buf = kmem_zalloc(dp->dev_nb->maxlen, KM_SLEEP); 846 addr6 = (struct sockaddr_in6 *)dp->dev_nb->buf; 847 addr6->sin6_family = af; 848 error = uaddr2sockaddr(af, dp->dev_addr.na_r_addr, 849 &addr6->sin6_addr, &addr6->sin6_port); 850 } 851 852 mds_ds_address_to_key(dp, af); 853 854 out: 855 if (error) { 856 if (newknc && dp->dev_knc) { 857 kmem_free(dp->dev_knc, sizeof (struct knetconfig)); 858 dp->dev_knc = NULL; 859 } 860 if (newnb && dp->dev_nb) { 861 if (dp->dev_nb->buf) 862 kmem_free(dp->dev_nb->buf, dp->dev_nb->maxlen); 863 kmem_free(dp->dev_nb, sizeof (struct netbuf)); 864 dp->dev_nb = NULL; 865 } 866 } 867 return (error); 868 } 869 870 void 871 mds_ds_addrlist_update(ds_owner_t *dop, struct ds_addr *dap) 872 { 873 struct mds_adddev_args darg; 874 bool_t create = FALSE; 875 ds_addrlist_t *dp; 876 ds_addrlist_t dp_map; 877 878 int af; 879 880 dp_map.dev_addr.na_r_addr = dap->addr.na_r_addr; 881 dp_map.dev_addr.na_r_netid = dap->addr.na_r_netid; 882 if (strcmp(dp_map.dev_addr.na_r_netid, "tcp") == 0) { 883 af = AF_INET; 884 } else if (strcmp(dp_map.dev_addr.na_r_netid, "tcp6") == 0) { 885 af = AF_INET6; 886 } else { 887 return; 888 } 889 890 mds_ds_address_to_key(&dp_map, af); 891 892 rw_enter(&mds_server->ds_addrlist_lock, RW_WRITER); 893 894 /* search for existing entry */ 895 dp = (ds_addrlist_t *)rfs4_dbsearch( 896 mds_server->ds_addrlist_addrkey_idx, 897 (void *)&dp_map.ds_addr_key, 898 &create, NULL, RFS4_DBS_VALID); 899 if (dp != NULL) { 900 /* 901 * XXX: Check to see that we are on the same port. 902 */ 903 goto done; 904 } 905 906 bzero(&darg, sizeof (darg)); 907 908 darg.dev_netid = dap->addr.na_r_netid; 909 darg.dev_addr = dap->addr.na_r_addr; 910 911 /* make it */ 912 dp = (ds_addrlist_t *)rfs4_dbcreate(mds_server->ds_addrlist_idx, 913 (void *)&darg); 914 if (dp == NULL) { 915 rw_exit(&mds_server->ds_addrlist_lock); 916 return; 917 } 918 919 dp->ds_owner = dop; 920 rfs4_dbe_hold(dop->dbe); 921 list_insert_tail(&dop->ds_addrlist_list, dp); 922 923 done: 924 925 MDS_SET_DS_FLAGS(dp->dev_flags, dap->validuse); 926 rw_exit(&mds_server->ds_addrlist_lock); 927 (void) mds_ds_initnet(dp); 928 929 rfs4_dbe_rele(dp->dbe); 930 } 931 932 /* ARGSUSED */ 933 void 934 ds_reportavail(DS_REPORTAVAILargs *argp, DS_REPORTAVAILres *resp, 935 struct svc_req *rqstp) 936 { 937 ds_owner_t *dop; 938 939 int i, j; 940 int count; 941 942 ds_guid_info_t *pgi; 943 struct ds_guid_map *guid_map; 944 DS_REPORTAVAILresok *res_ok; 945 946 ds_addr *dap; 947 948 mds_sid_content sid_content; 949 950 mds_sid *sid_array; 951 uint32_t sid_array_len; 952 953 /* 954 * data-server has no id so no soup for you. 955 */ 956 if (argp->ds_id == 0) { 957 resp->status = DSERR_INVAL; 958 return; 959 } 960 961 dop = mds_find_ds_owner_by_id(argp->ds_id); 962 if (dop == NULL) { 963 resp->status = DSERR_NOT_AUTH; 964 return; 965 } 966 967 /* 968 * ToDo: Check the verifier (args->ds_verifier). 969 */ 970 971 /* 972 * First deal with the universal addresses 973 */ 974 for (i = 0; i < argp->ds_addrs.ds_addrs_len; i++) { 975 dap = &argp->ds_addrs.ds_addrs_val[i]; 976 977 /* 978 * Create the entry and link it into the 979 * ds_owner's list of addrlist entries. 980 */ 981 mds_ds_addrlist_update(dop, dap); 982 } 983 984 res_ok = &(resp->DS_REPORTAVAILres_u.res_ok); 985 986 /* 987 * Set the attribute version so the data server knows which 988 * set of attributes the MDS knows about. Note: this is just 989 * ignored right now. 990 */ 991 res_ok->ds_attrvers = DS_ATTR_v1; 992 993 /* 994 * Now process the data store information. 995 * 996 * The number of entries in the response's guid_map is equal 997 * to the number of storage items the data server sent over 998 * with the REPORTAVAIL. We will only have a guid_map entry 999 * in the response for the storage items that were sent in 1000 * the arguments. 1001 */ 1002 guid_map = kmem_alloc(sizeof (struct ds_guid_map) * 1003 argp->ds_storinfo.ds_storinfo_len, KM_SLEEP); 1004 1005 count = 0; 1006 for (i = 0; i < argp->ds_storinfo.ds_storinfo_len; i++) { 1007 /* 1008 * Note: If we find an pre-existing entry, we 1009 * will delete it and create a new one. 1010 * If there is then an error, then the old 1011 * entry will still be unavailable. 1012 */ 1013 pgi = ds_guid_info_add(dop, 1014 &argp->ds_storinfo.ds_storinfo_val[i]); 1015 if (pgi == NULL) { 1016 continue; 1017 } 1018 1019 /* Data Server GUIDs */ 1020 /* Only supported type is ZFS */ 1021 ASSERT(pgi->ds_guid.stor_type == ZFS); 1022 guid_map[count].ds_guid = pgi->ds_guid; 1023 1024 ASSERT(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len 1025 == sizeof (mds_sid_content)); 1026 1027 /* 1028 * MDS SIDs: these would come from the mds_mapzap, 1029 * but for now we just reuse the ds_guid 1030 * 1031 * Note that whatever is used as the MDS SID 1032 * can not just be the ZFS id for the root fileset 1033 * as a mds could have multiple root filesets... 1034 */ 1035 bcopy(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val, 1036 &sid_content, sizeof (sid_content)); 1037 1038 /* 1039 * There is only one MDS SID associated with this 1040 * DS GUID 1041 * 1042 * XXX: But as we move a dataset to a new DS, 1043 * we might have multiple MDS SIDs. 1044 */ 1045 guid_map[count].mds_sid_array.mds_sid_array_len = 1046 sid_array_len = 1; 1047 guid_map[count].mds_sid_array.mds_sid_array_val = 1048 sid_array = 1049 kmem_zalloc(sid_array_len * sizeof (mds_sid), 1050 KM_SLEEP); 1051 1052 /* 1053 * Note, since we stuff the xdr_buffer into the 1054 * sid_array, we never explicitly free it by name! 1055 */ 1056 sid_array[0].len = pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len; 1057 sid_array[0].val = kmem_alloc(sid_array[0].len, KM_SLEEP); 1058 bcopy(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val, 1059 sid_array[0].val, sid_array[0].len); 1060 count++; 1061 1062 rfs4_dbe_rele(pgi->dbe); 1063 } 1064 1065 if (count) { 1066 res_ok->guid_map.guid_map_len = count; 1067 res_ok->guid_map.guid_map_val = guid_map; 1068 1069 /* 1070 * XXX: Once we finish kspe with the SMF work, 1071 * we will end up pulling this out! 1072 */ 1073 rw_enter(&mds_server->ds_guid_info_lock, RW_WRITER); 1074 mds_server->ds_guid_info_count += count; 1075 rw_exit(&mds_server->ds_guid_info_lock); 1076 } else { 1077 res_ok->guid_map.guid_map_len = 0; 1078 res_ok->guid_map.guid_map_val = NULL; 1079 } 1080 1081 /* 1082 * Make sure we set the bit that we've seen a DS check in 1083 */ 1084 if (nfs_ds_present == 0) 1085 nfs_ds_present = 1; 1086 1087 rfs4_dbe_rele(dop->dbe); /* search */ 1088 1089 resp->status = DS_OK; 1090 1091 return; 1092 1093 cleanup: 1094 1095 for (i = 0; i < count; i++) { 1096 sid_array = guid_map[i].mds_sid_array.mds_sid_array_val; 1097 sid_array_len = guid_map[i].mds_sid_array.mds_sid_array_len; 1098 1099 pgi = mds_find_ds_guid_info_by_id(&guid_map[i].ds_guid); 1100 if (pgi != NULL) { 1101 for (j = 0; j < sid_array_len; j++) { 1102 kmem_free(sid_array[j].val, 1103 sid_array[j].len); 1104 } 1105 1106 kmem_free(sid_array, 1107 sid_array_len * sizeof (mds_sid)); 1108 1109 rfs4_dbe_rele(pgi->dbe); /* search */ 1110 1111 list_remove(&pgi->ds_owner->ds_guid_list, pgi); 1112 rfs4_dbe_rele(pgi->ds_owner->dbe); 1113 1114 rfs4_dbe_invalidate(pgi->dbe); 1115 } 1116 } 1117 1118 kmem_free(guid_map, sizeof (struct ds_guid_map) * 1119 argp->ds_storinfo.ds_storinfo_len); 1120 1121 rfs4_dbe_rele(dop->dbe); /* search */ 1122 } 1123 1124 /* 1125 * XXX: 1126 * XXX: Needs to have nfs_server_instance passed in to it.. 1127 * XXX: 1128 */ 1129 /* ARGSUSED */ 1130 void 1131 ds_exchange(DS_EXIBIargs *argp, DS_EXIBIres *resp, struct svc_req *rqstp) 1132 { 1133 /* 1134 * XXX: This will go away with the SMF work! 1135 */ 1136 extern void mds_nuke_layout(nfs_server_instance_t *, uint32_t); 1137 1138 int lo_id; 1139 1140 ds_owner_t *dop; 1141 ds_addrlist_t *dp; 1142 ds_guid_info_t *pgi; 1143 bool_t do_create = TRUE; 1144 DS_EXIBIresok *dser = &(resp->DS_EXIBIres_u.res_ok); 1145 1146 unsigned long hostid = 0; 1147 1148 /* 1149 * Do some initial validation of the request. 1150 */ 1151 if (argp->ds_ident.boot_verifier == 0 || 1152 argp->ds_ident.instance.instance_len == 0) { 1153 resp->status = DSERR_INVAL; 1154 return; 1155 } 1156 1157 dop = mds_find_ds_owner(argp, &do_create); 1158 ASSERT(dop); 1159 if (!dop) { 1160 resp->status = DSERR_NOENT; 1161 return; 1162 } 1163 1164 /* 1165 * We found a match, now we need to see if it is the same 1166 * instance as before! 1167 */ 1168 if (do_create == FALSE) { 1169 /* 1170 * Only if the verifiers differ should we assume 1171 * a reboot. 1172 */ 1173 if (argp->ds_ident.boot_verifier != dop->verifier) { 1174 /* brute force it */ 1175 DTRACE_PROBE(nfssrv__i__dscp_freeing_device_entries); 1176 rw_enter(&mds_server->ds_addrlist_lock, RW_WRITER); 1177 while (dp = list_head(&dop->ds_addrlist_list)) { 1178 list_remove(&dop->ds_addrlist_list, dp); 1179 dp->ds_owner = NULL; 1180 rfs4_dbe_rele(dop->dbe); 1181 rfs4_dbe_invalidate(dp->dbe); 1182 } 1183 rw_exit(&mds_server->ds_addrlist_lock); 1184 1185 rw_enter(&mds_server->ds_guid_info_lock, RW_WRITER); 1186 while (pgi = list_head(&dop->ds_guid_list)) { 1187 list_remove(&dop->ds_guid_list, pgi); 1188 1189 /* 1190 * XXX: Hack alert! 1191 */ 1192 ASSERT(mds_server->ds_guid_info_count > 0); 1193 mds_server->ds_guid_info_count--; 1194 1195 pgi->ds_owner = NULL; 1196 rfs4_dbe_rele(dop->dbe); 1197 rfs4_dbe_invalidate(pgi->dbe); 1198 } 1199 rw_exit(&mds_server->ds_guid_info_lock); 1200 } 1201 1202 /* 1203 * XXX: This stuff needs to give way to something 1204 * smarter. 1205 */ 1206 rw_enter(&mds_server->mds_layout_lock, RW_WRITER); 1207 lo_id = mds_server->mds_layout_default_idx; 1208 mds_server->mds_layout_default_idx = 0; 1209 rw_exit(&mds_server->mds_layout_lock); 1210 1211 mds_nuke_layout(mds_server, lo_id); 1212 } 1213 1214 /* 1215 * XXXX: This would be a good place to notice the 1216 * XXXX: data-server has rebooted and we need to 1217 * XXXX: trash/invalidate/recall associated 1218 * XXXX: state.. of course the device information 1219 * XXXX: may have not changed (but ds_verifier would have) 1220 * XXXX: Hmmm..perhaps the correct place is in ds_reportavail 1221 * XXXX: when we notice an update (as opposed to add) 1222 * 1223 * XXX: But we wouldn't notice an update because we just 1224 * emptied the addrlist above. 1225 */ 1226 resp->status = DS_OK; 1227 dser->ds_id = dop->ds_id; 1228 1229 (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); 1230 dser->mds_id = (uint64_t)hostid; 1231 dser->mds_boot_verifier = mds_server->Write4verf; 1232 dser->mds_lease_period = mds_server->lease_period; 1233 1234 rfs4_dbe_rele(dop->dbe); /* create/search */ 1235 } 1236 1237 void 1238 nfs_ds_cp_dispatch(struct svc_req *req, SVCXPRT *xprt) 1239 { 1240 rpcproc_t which; 1241 union nfs_ds_cp_sarg darg; 1242 union nfs_ds_cp_sres dres; 1243 struct nfs_cp_disp *disp; 1244 1245 /* 1246 * validate version and procedure 1247 */ 1248 if (req->rq_vers != PNFSCTLDS_V1) { 1249 svcerr_progvers(req->rq_xprt, PNFSCTLDS_V1, PNFSCTLDS_V1); 1250 DTRACE_PROBE2(nfssrv__e__dscp__badvers, rpcvers_t, req->rq_vers, 1251 rpcvers_t, PNFSCTLDS_V1); 1252 return; 1253 } 1254 1255 which = req->rq_proc; 1256 if (which < 0 || which >= NFS_CP_ILLEGAL_PROC) { 1257 svcerr_noproc(req->rq_xprt); 1258 DTRACE_PROBE1(nfssrv__e__dscp__badproc, rpcproc_t, which); 1259 return; 1260 } 1261 1262 1263 /* RPC NULL is the zero proc, so short circuit */ 1264 if (which == 0) { 1265 (void) svc_sendreply(xprt, xdr_void, NULL); 1266 return; 1267 } 1268 1269 disp = &nfs_ds_cp_v1[which]; 1270 1271 /* 1272 * decode args 1273 */ 1274 bzero(&darg, sizeof (union nfs_ds_cp_sarg)); 1275 if (!SVC_GETARGS(xprt, disp->decode_args, (char *)&darg)) { 1276 svcerr_decode(xprt); 1277 DTRACE_PROBE2(nfssrv__e__dscp__decode, rpcvers_t, req->rq_vers, 1278 rpcproc_t, which); 1279 return; 1280 } 1281 1282 (*disp->proc)(&darg, &dres, req); 1283 1284 /* 1285 * encode result 1286 */ 1287 if (!svc_sendreply(xprt, disp->encode_reply, (char *)&dres)) { 1288 DTRACE_PROBE2(nfssrv__e__dscp__sendreply, 1289 rpcvers_t, req->rq_vers, rpcproc_t, which); 1290 svcerr_systemerr(xprt); 1291 } 1292 1293 /* 1294 * free results 1295 */ 1296 if (disp->resfree) { 1297 (*disp->resfree)(&dres); 1298 } 1299 1300 /* 1301 * free arguments 1302 */ 1303 if (!SVC_FREEARGS(xprt, disp->decode_args, (char *)&darg)) { 1304 DTRACE_PROBE2(nfssrv__e__svc__freeargs, rpcvers_t, req->rq_vers, 1305 rpcproc_t, which); 1306 } 1307 } 1308