1 6741 th199096 /* 2 6741 th199096 * CDDL HEADER START 3 6741 th199096 * 4 6741 th199096 * The contents of this file are subject to the terms of the 5 6741 th199096 * Common Development and Distribution License (the "License"). 6 6741 th199096 * You may not use this file except in compliance with the License. 7 6741 th199096 * 8 6741 th199096 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 6741 th199096 * or http://www.opensolaris.org/os/licensing. 10 6741 th199096 * See the License for the specific language governing permissions 11 6741 th199096 * and limitations under the License. 12 6741 th199096 * 13 6741 th199096 * When distributing Covered Code, include this CDDL HEADER in each 14 6741 th199096 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 6741 th199096 * If applicable, add the following below this CDDL HEADER, with the 16 6741 th199096 * fields enclosed by brackets "[]" replaced with your own identifying 17 6741 th199096 * information: Portions Copyright [yyyy] [name of copyright owner] 18 6741 th199096 * 19 6741 th199096 * CDDL HEADER END 20 6741 th199096 */ 21 6741 th199096 /* 22 8422 James * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 6741 th199096 * Use is subject to license terms. 24 6741 th199096 */ 25 6741 th199096 26 6741 th199096 #include <sys/flock.h> 27 6741 th199096 #include <nfs/export.h> 28 6741 th199096 #include <sys/cmn_err.h> 29 6741 th199096 #include <sys/atomic.h> 30 6741 th199096 #include <nfs/nfs.h> 31 6741 th199096 #include <nfs/nfs4.h> 32 6741 th199096 #include <nfs/nfssys.h> 33 6741 th199096 #include <nfs/lm.h> 34 6741 th199096 #include <sys/pathname.h> 35 6741 th199096 #include <sys/sdt.h> 36 6741 th199096 #include <sys/nvpair.h> 37 6741 th199096 #include <sys/sdt.h> 38 6741 th199096 #include <sys/disp.h> 39 10016 Thomas #include <sys/id_space.h> 40 6741 th199096 41 6741 th199096 extern u_longlong_t nfs4_srv_caller_id; 42 6741 th199096 43 7739 jwahlig #include <nfs/nfs_sstor_impl.h> 44 6741 th199096 #include <nfs/mds_state.h> 45 6741 th199096 #include <nfs/nfs41_sessions.h> 46 6741 th199096 47 6741 th199096 #include <nfs/nfs41_filehandle.h> 48 6741 th199096 49 10016 Thomas #include <nfs/spe_impl.h> 50 10016 Thomas 51 6741 th199096 static void mds_do_lorecall(mds_lorec_t *); 52 6741 th199096 static int mds_lorecall_cmd(struct mds_reclo_args *, cred_t *); 53 8312 webaker static int mds_notify_device_cmd(struct mds_notifydev_args *, cred_t *); 54 6741 th199096 55 7739 jwahlig extern void mds_do_cb_recall(struct rfs4_deleg_state *, bool_t); 56 6741 th199096 57 6741 th199096 /* 58 6741 th199096 * XXX - slrc_slot_size will more than likely have to be 59 6741 th199096 * computed dynamically as the server adjusts the 60 6741 th199096 * sessions' slot replay cache size. This should be 61 6741 th199096 * good for proto. 62 6741 th199096 */ 63 6741 th199096 slotid4 slrc_slot_size = MAXSLOTS; 64 10475 rick slotid4 bc_slot_tab = 0; /* backchan slots are set by client */ 65 6741 th199096 66 6741 th199096 /* The values below are rfs4_lease_time units */ 67 6741 th199096 68 6741 th199096 #ifdef DEBUG 69 6741 th199096 #define SESSION_CACHE_TIME 1 70 6741 th199096 #else 71 6741 th199096 #define SESSION_CACHE_TIME 10 72 6741 th199096 #endif 73 6741 th199096 74 6741 th199096 #define ONES_64 (0xFFFFFFFFFFFFFFFFuLL) 75 6741 th199096 76 6741 th199096 /* Sessions */ 77 6741 th199096 static void mds_session_destroy(rfs4_entry_t); 78 6741 th199096 static bool_t mds_session_expiry(rfs4_entry_t); 79 7739 jwahlig static bool_t mds_session_create(rfs4_entry_t, void *); 80 6741 th199096 static uint32_t sessid_hash(void *); 81 6741 th199096 static bool_t sessid_compare(rfs4_entry_t, void *); 82 6741 th199096 static void *sessid_mkkey(rfs4_entry_t); 83 6741 th199096 84 8312 webaker /* function pointers for mdsadm */ 85 8312 webaker 86 6741 th199096 extern int (*mds_recall_lo)(struct mds_reclo_args *, cred_t *); 87 8312 webaker extern int (*mds_notify_device)(struct mds_notifydev_args *, cred_t *); 88 6741 th199096 89 6741 th199096 extern char *kstrdup(const char *); 90 6741 th199096 91 6741 th199096 extern rfs4_client_t *findclient(nfs_server_instance_t *, nfs_client_id4 *, 92 6741 th199096 bool_t *, rfs4_client_t *); 93 6741 th199096 94 6741 th199096 extern rfs4_client_t *findclient_by_id(nfs_server_instance_t *, clientid4); 95 6741 th199096 96 6741 th199096 extern rfs4_openowner_t *findopenowner(nfs_server_instance_t *, open_owner4 *, 97 6741 th199096 bool_t *, seqid4); 98 6741 th199096 99 7739 jwahlig extern void v4prot_sstor_init(nfs_server_instance_t *); 100 7739 jwahlig 101 7739 jwahlig extern void rfs4_ss_retrieve_state(nfs_server_instance_t *); 102 7739 jwahlig extern int nfs_doorfd; 103 6741 th199096 104 6741 th199096 #ifdef DEBUG 105 6741 th199096 #define MDS_TABSIZE 17 106 6741 th199096 #else 107 6741 th199096 #define MDS_TABSIZE 2047 108 6741 th199096 #endif 109 6741 th199096 110 6741 th199096 #define MDS_MAXTABSZ 1024*1024 111 6741 th199096 112 6741 th199096 extern uint32_t clientid_hash(void *); 113 7739 jwahlig 114 7739 jwahlig /* 115 7739 jwahlig * Returns the instances capabilities flag word 116 7739 jwahlig * the form of: 117 7739 jwahlig * 118 7739 jwahlig * EXCHGID4_FLAG_USE_NON_PNFS 119 7739 jwahlig * EXCHGID4_FLAG_USE_PNFS_MDS 120 7739 jwahlig * EXCHGID4_FLAG_USE_PNFS_DS 121 7739 jwahlig * 122 7739 jwahlig */ 123 7739 jwahlig uint32_t 124 7739 jwahlig mds_get_capabilities(nfs_server_instance_t *instp) 125 7739 jwahlig { 126 7739 jwahlig uint32_t my_abilities = 0; 127 7739 jwahlig 128 7739 jwahlig if (instp) 129 7739 jwahlig my_abilities = 130 7739 jwahlig instp->inst_flags & EXCHGID4_FLAG_MASK_PNFS; 131 7739 jwahlig return (my_abilities); 132 7739 jwahlig } 133 7739 jwahlig 134 6741 th199096 135 6741 th199096 /*ARGSUSED*/ 136 6741 th199096 static bool_t 137 6741 th199096 mds_do_not_expire(rfs4_entry_t u_entry) 138 6741 th199096 { 139 6741 th199096 return (FALSE); 140 6741 th199096 } 141 6741 th199096 142 6741 th199096 /*ARGSUSED*/ 143 6741 th199096 static stateid_t 144 7739 jwahlig mds_create_stateid(rfs4_dbe_t *dbe, stateid_type_t id_type) 145 6741 th199096 { 146 6741 th199096 stateid_t id; 147 6741 th199096 148 7739 jwahlig id.v41_bits.boottime = dbe_to_instp(dbe)->start_time; 149 6741 th199096 id.v41_bits.state_ident = rfs4_dbe_getid(dbe); 150 8422 James id.v41_bits.chgseq = 0; 151 6741 th199096 id.v41_bits.type = id_type; 152 6741 th199096 id.v41_bits.pid = 0; 153 6741 th199096 154 6741 th199096 return (id); 155 6741 th199096 } 156 6741 th199096 157 6741 th199096 158 6741 th199096 rfs4_openowner_t * 159 7739 jwahlig mds_findopenowner(nfs_server_instance_t *instp, open_owner4 *openowner, 160 7739 jwahlig bool_t *create) 161 6741 th199096 { 162 10447 Thomas rfs4_openowner_t *oo; 163 6741 th199096 rfs4_openowner_t arg; 164 6741 th199096 165 10447 Thomas arg.ro_owner = *openowner; 166 10447 Thomas arg.ro_open_seqid = 0; 167 10447 Thomas oo = (rfs4_openowner_t *)rfs4_dbsearch(instp->openowner_idx, 168 6741 th199096 openowner, create, &arg, RFS4_DBS_VALID); 169 10447 Thomas return (oo); 170 6741 th199096 } 171 6741 th199096 172 6741 th199096 rfs4_lo_state_t * 173 6741 th199096 mds_findlo_state_by_owner(rfs4_lockowner_t *lo, 174 6741 th199096 rfs4_state_t *sp, bool_t *create) 175 6741 th199096 { 176 6741 th199096 rfs4_lo_state_t *lsp; 177 6741 th199096 rfs4_lo_state_t arg; 178 7739 jwahlig nfs_server_instance_t *instp; 179 6741 th199096 180 10447 Thomas arg.rls_locker = lo; 181 10447 Thomas arg.rls_state = sp; 182 10447 Thomas 183 10447 Thomas instp = dbe_to_instp(lo->rl_dbe); 184 7739 jwahlig 185 7739 jwahlig lsp = (rfs4_lo_state_t *)rfs4_dbsearch(instp->lo_state_owner_idx, 186 6741 th199096 &arg, create, &arg, RFS4_DBS_VALID); 187 6741 th199096 188 6741 th199096 return (lsp); 189 6741 th199096 } 190 6741 th199096 191 10447 Thomas /* XXX: well clearly this needs to be cleaned up.. */ 192 6741 th199096 typedef union { 193 6741 th199096 struct { 194 6741 th199096 uint32_t start_time; 195 6741 th199096 uint32_t c_id; 196 6741 th199096 } impl_id; 197 6741 th199096 clientid4 id4; 198 6741 th199096 } cid; 199 6741 th199096 200 6741 th199096 int 201 6741 th199096 mds_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid) 202 6741 th199096 { 203 6741 th199096 stateid_t *id = (stateid_t *)stateid; 204 6741 th199096 205 10447 Thomas if (rfs4_lease_expired(sp->rs_owner->ro_client)) 206 6741 th199096 return (NFS4_CHECK_STATEID_EXPIRED); 207 6741 th199096 208 6741 th199096 /* Stateid is some time in the future - that's bad */ 209 10447 Thomas if (sp->rs_stateid.v41_bits.chgseq < id->v41_bits.chgseq) 210 6741 th199096 return (NFS4_CHECK_STATEID_BAD); 211 6741 th199096 212 10447 Thomas if (sp->rs_closed == TRUE) 213 6741 th199096 return (NFS4_CHECK_STATEID_CLOSED); 214 6741 th199096 215 6741 th199096 return (NFS4_CHECK_STATEID_OKAY); 216 6741 th199096 } 217 6741 th199096 218 6741 th199096 int 219 6741 th199096 mds_fh_is_exi(struct exportinfo *exi, nfs41_fh_fmt_t *fhp) 220 6741 th199096 { 221 6741 th199096 if (exi->exi_fid.fid_len != fhp->fh.v1.export_fid.len) 222 6741 th199096 return (0); 223 6741 th199096 224 6741 th199096 if (bcmp(exi->exi_fid.fid_data, fhp->fh.v1.export_fid.val, 225 6741 th199096 fhp->fh.v1.export_fid.len) != 0) 226 6741 th199096 return (0); 227 6741 th199096 228 6741 th199096 if (exi->exi_fsid.val[0] != fhp->fh.v1.export_fsid.val[0] || 229 6741 th199096 exi->exi_fsid.val[1] != fhp->fh.v1.export_fsid.val[1]) 230 6741 th199096 return (0); 231 6741 th199096 232 6741 th199096 return (1); 233 6741 th199096 } 234 6741 th199096 235 6741 th199096 /* 236 6741 th199096 * This function is used as a target for the rfs4_dbe_walk() call 237 6741 th199096 * below. The purpose of this function is to see if the 238 6741 th199096 * lockowner_state refers to a file that resides within the exportinfo 239 6741 th199096 * export. If so, then remove the lock_owner state (file locks and 240 6741 th199096 * share "locks") for this object since the intent is the server is 241 6741 th199096 * unexporting the specified directory. Be sure to invalidate the 242 6741 th199096 * object after the state has been released 243 6741 th199096 */ 244 6741 th199096 void 245 6741 th199096 mds_lo_state_walk_callout(rfs4_entry_t u_entry, void *e) 246 6741 th199096 { 247 6741 th199096 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 248 6741 th199096 struct exportinfo *exi = (struct exportinfo *)e; 249 6741 th199096 nfs41_fh_fmt_t *fhp; 250 6741 th199096 251 10447 Thomas fhp = (nfs41_fh_fmt_t *) 252 10447 Thomas lsp->rls_state->rs_finfo->rf_filehandle.nfs_fh4_val; 253 6741 th199096 254 6741 th199096 if (mds_fh_is_exi(exi, fhp)) { 255 10447 Thomas rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED()); 256 10447 Thomas rfs4_dbe_invalidate(lsp->rls_dbe); 257 10447 Thomas rfs4_dbe_invalidate(lsp->rls_state->rs_dbe); 258 6741 th199096 } 259 6741 th199096 } 260 6741 th199096 261 6741 th199096 /* 262 6741 th199096 * This function is used as a target for the rfs4_dbe_walk() call 263 6741 th199096 * below. The purpose of this function is to see if the state refers 264 6741 th199096 * to a file that resides within the exportinfo export. If so, then 265 6741 th199096 * remove the open state for this object since the intent is the 266 6741 th199096 * server is unexporting the specified directory. The main result for 267 6741 th199096 * this type of entry is to invalidate it such it will not be found in 268 6741 th199096 * the future. 269 6741 th199096 */ 270 6741 th199096 void 271 6741 th199096 mds_state_walk_callout(rfs4_entry_t u_entry, void *e) 272 6741 th199096 { 273 6741 th199096 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 274 6741 th199096 struct exportinfo *exi = (struct exportinfo *)e; 275 6741 th199096 nfs41_fh_fmt_t *fhp; 276 6741 th199096 277 6741 th199096 fhp = 278 10447 Thomas (nfs41_fh_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val; 279 6741 th199096 280 6741 th199096 if (mds_fh_is_exi(exi, fhp)) { 281 6741 th199096 rfs4_state_close(sp, TRUE, FALSE, CRED()); 282 10447 Thomas rfs4_dbe_invalidate(sp->rs_dbe); 283 6741 th199096 } 284 6741 th199096 } 285 6741 th199096 286 6741 th199096 /* 287 6741 th199096 * This function is used as a target for the rfs4_dbe_walk() call 288 6741 th199096 * below. The purpose of this function is to see if the state refers 289 6741 th199096 * to a file that resides within the exportinfo export. If so, then 290 6741 th199096 * remove the deleg state for this object since the intent is the 291 6741 th199096 * server is unexporting the specified directory. The main result for 292 6741 th199096 * this type of entry is to invalidate it such it will not be found in 293 6741 th199096 * the future. 294 6741 th199096 */ 295 6741 th199096 void 296 6741 th199096 mds_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e) 297 6741 th199096 { 298 6741 th199096 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 299 6741 th199096 struct exportinfo *exi = (struct exportinfo *)e; 300 6741 th199096 nfs41_fh_fmt_t *fhp; 301 6741 th199096 302 6741 th199096 fhp = 303 10447 Thomas (nfs41_fh_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val; 304 6741 th199096 305 6741 th199096 if (mds_fh_is_exi(exi, fhp)) { 306 10447 Thomas rfs4_dbe_invalidate(dsp->rds_dbe); 307 6741 th199096 } 308 6741 th199096 } 309 6741 th199096 310 6741 th199096 /* 311 6741 th199096 * This function is used as a target for the rfs4_dbe_walk() call 312 6741 th199096 * below. The purpose of this function is to see if the state refers 313 6741 th199096 * to a file that resides within the exportinfo export. If so, then 314 6741 th199096 * release vnode hold for this object since the intent is the server 315 6741 th199096 * is unexporting the specified directory. Invalidation will prevent 316 6741 th199096 * this struct from being found in the future. 317 6741 th199096 */ 318 6741 th199096 void 319 6741 th199096 mds_file_walk_callout(rfs4_entry_t u_entry, void *e) 320 6741 th199096 { 321 6741 th199096 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 322 6741 th199096 struct exportinfo *exi = (struct exportinfo *)e; 323 6741 th199096 nfs41_fh_fmt_t *fhp; 324 6741 th199096 vnode_t *vp; 325 7739 jwahlig nfs_server_instance_t *instp; 326 6741 th199096 327 10447 Thomas fhp = (nfs41_fh_fmt_t *)fp->rf_filehandle.nfs_fh4_val; 328 6741 th199096 329 6741 th199096 if (mds_fh_is_exi(exi, fhp) == 0) 330 6741 th199096 return; 331 6741 th199096 332 10447 Thomas if ((vp = fp->rf_vp) != NULL) { 333 10447 Thomas instp = dbe_to_instp(fp->rf_dbe); 334 7739 jwahlig ASSERT(instp); 335 9404 Thomas 336 6741 th199096 /* 337 6741 th199096 * don't leak monitors and remove the reference 338 6741 th199096 * put on the vnode when the delegation was granted. 339 6741 th199096 */ 340 10447 Thomas if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_READ) { 341 7739 jwahlig (void) fem_uninstall(vp, instp->deleg_rdops, 342 6741 th199096 (void *)fp); 343 6741 th199096 vn_open_downgrade(vp, FREAD); 344 10447 Thomas } else if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) { 345 7739 jwahlig (void) fem_uninstall(vp, instp->deleg_wrops, 346 6741 th199096 (void *)fp); 347 6741 th199096 vn_open_downgrade(vp, FREAD|FWRITE); 348 6741 th199096 } 349 9404 Thomas 350 6741 th199096 mutex_enter(&vp->v_lock); 351 7739 jwahlig (void) vsd_set(vp, instp->vkey, NULL); 352 6741 th199096 mutex_exit(&vp->v_lock); 353 6741 th199096 VN_RELE(vp); 354 10447 Thomas fp->rf_vp = NULL; 355 10447 Thomas } 356 10447 Thomas 357 10447 Thomas rfs4_dbe_invalidate(fp->rf_dbe); 358 6741 th199096 } 359 6741 th199096 360 6741 th199096 /* 361 6741 th199096 * -------------------------------------------------------- 362 6741 th199096 * MDS - NFSv4.1 Sessions 363 6741 th199096 * -------------------------------------------------------- 364 6741 th199096 */ 365 6741 th199096 static uint32_t 366 6741 th199096 sessid_hash(void *key) 367 6741 th199096 { 368 6741 th199096 sid *idp = key; 369 6741 th199096 370 6741 th199096 return (idp->impl_id.s_id); 371 6741 th199096 } 372 6741 th199096 373 6741 th199096 static bool_t 374 6741 th199096 sessid_compare(rfs4_entry_t entry, void *key) 375 6741 th199096 { 376 10448 Thomas mds_session_t *sp = (mds_session_t *)entry; 377 6741 th199096 sessionid4 *idp = (sessionid4 *)key; 378 6741 th199096 379 10448 Thomas return (bcmp(idp, &sp->sn_sessid, sizeof (sessionid4)) == 0); 380 6741 th199096 } 381 6741 th199096 382 6741 th199096 static void * 383 6741 th199096 sessid_mkkey(rfs4_entry_t entry) 384 6741 th199096 { 385 10448 Thomas mds_session_t *sp = (mds_session_t *)entry; 386 10448 Thomas 387 10448 Thomas return (&sp->sn_sessid); 388 6741 th199096 } 389 6741 th199096 390 6741 th199096 static bool_t 391 6741 th199096 sess_clid_compare(rfs4_entry_t entry, void *key) 392 6741 th199096 { 393 10448 Thomas mds_session_t *sp = (mds_session_t *)entry; 394 6741 th199096 clientid4 *idp = key; 395 6741 th199096 396 10448 Thomas return (*idp == sp->sn_clnt->rc_clientid); 397 6741 th199096 } 398 6741 th199096 399 6741 th199096 static void * 400 6741 th199096 sess_clid_mkkey(rfs4_entry_t entry) 401 6741 th199096 { 402 10447 Thomas return (&(((mds_session_t *)entry)->sn_clnt->rc_clientid)); 403 10447 Thomas } 404 10447 Thomas 405 10447 Thomas void 406 10448 Thomas rfs41_session_rele(mds_session_t *sp) 407 10448 Thomas { 408 10448 Thomas rfs4_dbe_rele(sp->sn_dbe); 409 6741 th199096 } 410 6741 th199096 411 6741 th199096 mds_session_t * 412 7739 jwahlig mds_findsession_by_id(nfs_server_instance_t *instp, sessionid4 sessid) 413 6741 th199096 { 414 10448 Thomas mds_session_t *sp; 415 7739 jwahlig rfs4_index_t *idx = instp->mds_session_idx; 416 6741 th199096 bool_t create = FALSE; 417 6741 th199096 418 7739 jwahlig rw_enter(&instp->findsession_lock, RW_READER); 419 10448 Thomas sp = (mds_session_t *)rfs4_dbsearch(idx, sessid, &create, NULL, 420 6741 th199096 RFS4_DBS_VALID); 421 7739 jwahlig rw_exit(&instp->findsession_lock); 422 6741 th199096 423 10448 Thomas return (sp); 424 6741 th199096 } 425 6741 th199096 426 6741 th199096 mds_session_t * 427 7739 jwahlig mds_findsession_by_clid(nfs_server_instance_t *instp, clientid4 clid) 428 6741 th199096 { 429 10448 Thomas mds_session_t *sp; 430 6741 th199096 bool_t create = FALSE; 431 6741 th199096 432 7739 jwahlig rw_enter(&instp->findsession_lock, RW_READER); 433 10448 Thomas sp = (mds_session_t *)rfs4_dbsearch(instp->mds_sess_clientid_idx, &clid, 434 6741 th199096 &create, NULL, RFS4_DBS_VALID); 435 7739 jwahlig rw_exit(&instp->findsession_lock); 436 6741 th199096 437 10448 Thomas return (sp); 438 6741 th199096 } 439 6741 th199096 440 6741 th199096 /* 441 6741 th199096 * A clientid can have multiple sessions associated with it. Hence, 442 6741 th199096 * performing a raw 'mds_findsession' (even for a create) might 443 6741 th199096 * yield a list of sessions associated with the clientid in question. 444 6741 th199096 * Instead of delving deep into the rfs4_dbsearch engine to correct 445 6741 th199096 * this now, we'll call our function directly and create an association 446 6741 th199096 * between the session table and both primary (sessionid) index and 447 6741 th199096 * secondary (clientid) index for the newly created session. 448 6741 th199096 */ 449 6741 th199096 mds_session_t * 450 7739 jwahlig mds_createsession(nfs_server_instance_t *instp, session41_create_t *ap) 451 6741 th199096 { 452 10448 Thomas mds_session_t *sp = NULL; 453 7739 jwahlig rfs4_index_t *idx = instp->mds_session_idx; 454 6741 th199096 455 7739 jwahlig rw_enter(&instp->findsession_lock, RW_WRITER); 456 10448 Thomas if ((sp = (mds_session_t *)rfs4_dbcreate(idx, (void *)ap)) == NULL) { 457 6741 th199096 DTRACE_PROBE1(mds__srv__createsession__fail, 458 6741 th199096 session41_create_t *, ap); 459 6741 th199096 } 460 7739 jwahlig rw_exit(&instp->findsession_lock); 461 10448 Thomas return (sp); 462 6741 th199096 } 463 6741 th199096 464 6741 th199096 /* 465 7397 rick * mds_session_inval invalidates the session so other 466 7397 rick * threads won't "find" the session to place additional 467 7397 rick * callbacks. Destroy session even if no backchannel has 468 7397 rick * been established. 469 6741 th199096 */ 470 7397 rick nfsstat4 471 10448 Thomas mds_session_inval(mds_session_t *sp) 472 6741 th199096 { 473 7397 rick nfsstat4 status; 474 6741 th199096 475 10448 Thomas ASSERT(sp != NULL); 476 10448 Thomas ASSERT(rfs4_dbe_islocked(sp->sn_dbe)); 477 10448 Thomas 478 10448 Thomas if (SN_CB_CHAN_EST(sp)) { 479 10448 Thomas sess_channel_t *bcp = sp->sn_back; 480 6741 th199096 sess_bcsd_t *bsdp; 481 6741 th199096 482 6741 th199096 rw_enter(&bcp->cn_lock, RW_READER); 483 6741 th199096 if ((bsdp = CTOBSD(bcp)) == NULL) 484 6741 th199096 cmn_err(CE_PANIC, "mds_session_inval: BCSD Not Set"); 485 6741 th199096 486 7813 rick rw_enter(&bsdp->bsd_rwlock, RW_READER); 487 7397 rick status = bsdp->bsd_stat = slot_cb_status(bsdp->bsd_stok); 488 7813 rick rw_exit(&bsdp->bsd_rwlock); 489 6741 th199096 490 6741 th199096 rw_exit(&bcp->cn_lock); 491 6741 th199096 } else { 492 6741 th199096 cmn_err(CE_NOTE, "No back chan established"); 493 7397 rick status = NFS4_OK; 494 6741 th199096 } 495 7813 rick 496 7813 rick /* only invalidate sess if no bc traffic */ 497 7813 rick if (status == NFS4_OK) 498 10448 Thomas rfs4_dbe_invalidate(sp->sn_dbe); 499 7813 rick 500 7397 rick return (status); 501 6741 th199096 } 502 6741 th199096 503 6741 th199096 /* 504 6741 th199096 * 1) Invalidate the session in the DB (so it can't be found anymore) 505 6741 th199096 * 2) Verify that there's no outstanding CB traffic. If so, return err. 506 6741 th199096 * 3) Eventually the session will be reaped by the reaper_thread 507 6741 th199096 */ 508 6741 th199096 nfsstat4 509 10448 Thomas mds_destroysession(mds_session_t *sp) 510 6741 th199096 { 511 7397 rick nfsstat4 cbs; 512 6741 th199096 513 10448 Thomas rfs4_dbe_lock(sp->sn_dbe); 514 10448 Thomas cbs = mds_session_inval(sp); 515 10448 Thomas rfs4_dbe_unlock(sp->sn_dbe); 516 6741 th199096 517 6741 th199096 /* 518 9214 rick * The reference/hold maintained from the session to the client 519 9214 rick * struct gets nuked when the DB calls rfs4_dbe_destroy, which 520 9214 rick * in turn calls mds_session_destroy. 521 6741 th199096 */ 522 7813 rick if (cbs == NFS4_OK) 523 10448 Thomas rfs41_session_rele(sp); 524 7813 rick 525 7397 rick return (cbs); 526 6741 th199096 } 527 6741 th199096 528 6741 th199096 sn_chan_dir_t 529 6741 th199096 pd2cd(channel_dir_from_server4 dir) 530 6741 th199096 { 531 6741 th199096 switch (dir) { 532 6741 th199096 case CDFS4_FORE: 533 6741 th199096 return (SN_CHAN_FORE); 534 6741 th199096 535 6741 th199096 case CDFS4_BACK: 536 6741 th199096 return (SN_CHAN_BACK); 537 6741 th199096 538 6741 th199096 case CDFS4_BOTH: 539 6741 th199096 default: 540 6741 th199096 return (SN_CHAN_BOTH); 541 6741 th199096 } 542 6741 th199096 /* NOTREACHED */ 543 7397 rick } 544 7397 rick 545 7397 rick /* 546 7397 rick * Delegation CB race detection support 547 7397 rick */ 548 7397 rick void 549 7397 rick rfs41_deleg_rs_hold(rfs4_deleg_state_t *dsp) 550 7397 rick { 551 10447 Thomas atomic_add_32(&dsp->rds_rs.refcnt, 1); 552 7397 rick } 553 7397 rick 554 7397 rick void 555 7397 rick rfs41_deleg_rs_rele(rfs4_deleg_state_t *dsp) 556 7397 rick { 557 10447 Thomas ASSERT(dsp->rds_rs.refcnt > 0); 558 10447 Thomas atomic_add_32(&dsp->rds_rs.refcnt, -1); 559 10447 Thomas if (dsp->rds_rs.refcnt == 0) { 560 10447 Thomas bzero(dsp->rds_rs.sessid, sizeof (sessionid4)); 561 10447 Thomas dsp->rds_rs.seqid = dsp->rds_rs.slotno = 0; 562 7397 rick } 563 7397 rick } 564 7397 rick 565 7397 rick void 566 7397 rick rfs41_seq4_hold(void *data, uint32_t flag) 567 7397 rick { 568 7397 rick bit_attr_t *p = (bit_attr_t *)data; 569 7397 rick uint32_t idx = log2(flag); 570 7397 rick 571 7397 rick ASSERT(p[idx].ba_bit == flag); 572 7397 rick atomic_add_32(&p[idx].ba_refcnt, 1); 573 7397 rick p[idx].ba_trigger = gethrestime_sec(); 574 7397 rick } 575 7397 rick 576 7397 rick void 577 7397 rick rfs41_seq4_rele(void *data, uint32_t flag) 578 7397 rick { 579 7397 rick bit_attr_t *p = (bit_attr_t *)data; 580 7397 rick uint32_t idx = log2(flag); 581 7397 rick 582 7397 rick ASSERT(p[idx].ba_bit == flag); 583 7397 rick if (p[idx].ba_refcnt > 0) 584 7397 rick atomic_add_32(&p[idx].ba_refcnt, -1); 585 7397 rick p[idx].ba_trigger = gethrestime_sec(); 586 6741 th199096 } 587 6741 th199096 588 6741 th199096 sess_channel_t * 589 6741 th199096 rfs41_create_session_channel(channel_dir_from_server4 dir) 590 6741 th199096 { 591 6741 th199096 sess_channel_t *cp; 592 6741 th199096 sess_bcsd_t *bp; 593 6741 th199096 594 6741 th199096 cp = (sess_channel_t *)kmem_zalloc(sizeof (sess_channel_t), KM_SLEEP); 595 6741 th199096 rw_init(&cp->cn_lock, NULL, RW_DEFAULT, NULL); 596 6741 th199096 597 6741 th199096 switch (dir) { 598 6741 th199096 case CDFS4_FORE: 599 6741 th199096 break; 600 6741 th199096 601 6741 th199096 case CDFS4_BOTH: 602 6741 th199096 case CDFS4_BACK: 603 6741 th199096 /* BackChan Specific Data */ 604 6741 th199096 bp = (sess_bcsd_t *)kmem_zalloc(sizeof (sess_bcsd_t), KM_SLEEP); 605 7397 rick rw_init(&bp->bsd_rwlock, NULL, RW_DEFAULT, NULL); 606 6741 th199096 cp->cn_csd = (sess_bcsd_t *)bp; 607 6741 th199096 break; 608 6741 th199096 } 609 6741 th199096 return (cp); 610 6741 th199096 } 611 6741 th199096 612 6741 th199096 void 613 7813 rick rfs41_destroy_session_channel(mds_session_t *sp, channel_dir_from_server4 dir) 614 6741 th199096 { 615 7813 rick sess_channel_t *cp; 616 6741 th199096 sess_bcsd_t *bp; 617 6741 th199096 618 7813 rick if (sp == NULL) 619 7813 rick return; 620 7813 rick if (dir == CDFS4_FORE && sp->sn_fore == NULL) 621 7813 rick return; 622 7813 rick if (dir == CDFS4_BACK && sp->sn_back == NULL) 623 6741 th199096 return; 624 6741 th199096 625 7813 rick if (sp->sn_bdrpc) { 626 7813 rick ASSERT(sp->sn_fore == sp->sn_back); 627 7813 rick sp->sn_fore = NULL; 628 7813 rick goto back; 629 7813 rick } 630 6741 th199096 631 7813 rick if (dir == CDFS4_FORE || dir == CDFS4_BOTH) { 632 7813 rick fore: 633 7813 rick if (sp->sn_fore == NULL) 634 7813 rick return; 635 7813 rick cp = sp->sn_fore; 636 7813 rick 637 7813 rick rw_destroy(&cp->cn_lock); 638 7813 rick kmem_free(cp, sizeof (sess_channel_t)); 639 7813 rick sp->sn_fore = NULL; 640 7813 rick } 641 7813 rick 642 7813 rick if (dir == CDFS4_BACK || dir == CDFS4_BOTH) { 643 7813 rick back: 644 7813 rick if (sp->sn_back == NULL) 645 7813 rick return; 646 7813 rick cp = sp->sn_back; 647 7813 rick 648 6741 th199096 bp = (sess_bcsd_t *)cp->cn_csd; 649 7397 rick rw_destroy(&bp->bsd_rwlock); 650 6741 th199096 kmem_free(bp, sizeof (sess_bcsd_t)); 651 7813 rick 652 7813 rick rw_destroy(&cp->cn_lock); 653 7813 rick kmem_free(cp, sizeof (sess_channel_t)); 654 7813 rick sp->sn_back = NULL; 655 6741 th199096 } 656 6741 th199096 } 657 6741 th199096 658 6741 th199096 /* 659 6741 th199096 * Create/Initialize the session for this rfs4_client_t. Also 660 6741 th199096 * create its slot replay cache as per the server's resource 661 6741 th199096 * constraints. 662 6741 th199096 */ 663 6741 th199096 /* ARGSUSED */ 664 6741 th199096 static bool_t 665 10016 Thomas mds_session_create(rfs4_entry_t u_entry, void *arg) 666 6741 th199096 { 667 10448 Thomas mds_session_t *sp = (mds_session_t *)u_entry; 668 6741 th199096 session41_create_t *ap = (session41_create_t *)arg; 669 6741 th199096 sess_channel_t *ocp = NULL; 670 6741 th199096 sid *sidp; 671 6741 th199096 SVCMASTERXPRT *mxprt; 672 6741 th199096 uint32_t i; 673 6741 th199096 int bdrpc; 674 6741 th199096 rpcprog_t prog; 675 6741 th199096 channel_dir_from_server4 dir; 676 6741 th199096 sess_bcsd_t *bsdp; 677 10475 rick nfs_server_instance_t *instp; 678 10475 rick int max_slots; 679 10475 rick nfsstat4 sle; 680 10475 rick struct svc_req *req; 681 6741 th199096 682 10448 Thomas ASSERT(sp != NULL); 683 10448 Thomas if (sp == NULL) 684 6741 th199096 return (FALSE); 685 7739 jwahlig 686 10448 Thomas instp = dbe_to_instp(sp->sn_dbe); 687 6741 th199096 688 6741 th199096 /* 689 9214 rick * Back pointer/ref to parent data struct (rfs4_client_t) 690 6741 th199096 */ 691 10448 Thomas sp->sn_clnt = (rfs4_client_t *)ap->cs_client; 692 10448 Thomas rfs4_dbe_hold(sp->sn_clnt->rc_dbe); 693 10475 rick req = (struct svc_req *)ap->cs_req; 694 10475 rick mxprt = (SVCMASTERXPRT *)req->rq_xprt->xp_master; 695 6741 th199096 696 6741 th199096 /* 697 6741 th199096 * Handcrafting the session id 698 6741 th199096 */ 699 10448 Thomas sidp = (sid *)&sp->sn_sessid; 700 6741 th199096 sidp->impl_id.pad0 = 0x00000000; 701 6741 th199096 sidp->impl_id.pad1 = 0xFFFFFFFF; 702 7739 jwahlig sidp->impl_id.start_time = instp->start_time; 703 10448 Thomas sidp->impl_id.s_id = (uint32_t)rfs4_dbe_getid(sp->sn_dbe); 704 6741 th199096 705 6741 th199096 /* 706 6741 th199096 * Process csa_flags; note that CREATE_SESSION4_FLAG_CONN_BACK_CHAN 707 6741 th199096 * is processed below since it affects direction and setup of the 708 6741 th199096 * backchannel accordingly. 709 6741 th199096 */ 710 10448 Thomas sp->sn_csflags = 0; 711 6741 th199096 if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_PERSIST) 712 6741 th199096 /* XXX - Worry about persistence later */ 713 10448 Thomas sp->sn_csflags &= ~CREATE_SESSION4_FLAG_PERSIST; 714 6741 th199096 715 6741 th199096 if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_RDMA) 716 6741 th199096 /* XXX - No RDMA for now */ 717 10448 Thomas sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_RDMA; 718 6741 th199096 719 6741 th199096 /* 720 6741 th199096 * Initialize some overall sessions values 721 6741 th199096 */ 722 10448 Thomas sp->sn_bc.progno = ap->cs_aotw.csa_cb_program; 723 10448 Thomas sp->sn_laccess = gethrestime_sec(); 724 10448 Thomas sp->sn_flags = 0; 725 6741 th199096 726 6741 th199096 /* 727 6741 th199096 * Check if client has specified that the FORE channel should 728 6741 th199096 * also be used for call back traffic (ie. bidir RPC). If so, 729 6741 th199096 * let's try to accomodate the request. 730 6741 th199096 */ 731 6741 th199096 DTRACE_PROBE1(csa__flags, uint32_t, ap->cs_aotw.csa_flags); 732 6741 th199096 bdrpc = ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 733 6741 th199096 734 6741 th199096 if (bdrpc) { 735 6741 th199096 SVCCB_ARGS cbargs; 736 10448 Thomas prog = sp->sn_bc.progno; 737 6741 th199096 cbargs.xprt = mxprt; 738 6741 th199096 cbargs.prog = prog; 739 6741 th199096 cbargs.vers = NFS_CB; 740 6741 th199096 cbargs.family = AF_INET; 741 10448 Thomas cbargs.tag = (void *)sp->sn_sessid; 742 6741 th199096 743 10475 rick if (SVC_CTL(req->rq_xprt, SVCCTL_SET_CBCONN, (void *)&cbargs)) { 744 6741 th199096 /* 745 6741 th199096 * Couldn't create a bi-dir RPC connection. Reset 746 6741 th199096 * bdrpc so that the session's channel flags are 747 6741 th199096 * set appropriately and the client knows it needs 748 6741 th199096 * to do the BIND_CONN_TO_SESSION dance in order 749 6741 th199096 * to establish a callback path. 750 6741 th199096 */ 751 6741 th199096 bdrpc = 0; 752 6741 th199096 } 753 6741 th199096 } 754 6741 th199096 755 6741 th199096 /* 756 6741 th199096 * Session's channel flags depending on bdrpc 757 6741 th199096 */ 758 10448 Thomas sp->sn_bdrpc = bdrpc; 759 10448 Thomas dir = sp->sn_bdrpc ? (CDFS4_FORE | CDFS4_BACK) : CDFS4_FORE; 760 6741 th199096 ocp = rfs41_create_session_channel(dir); 761 6741 th199096 ocp->cn_dir = dir; 762 10448 Thomas sp->sn_fore = ocp; 763 6741 th199096 764 6741 th199096 /* 765 10475 rick * Check if channel attrs will be flexible enough for future 766 10475 rick * purposes. Channel attribute enforcement is done as part of 767 10475 rick * COMPOUND processing. 768 6741 th199096 */ 769 6741 th199096 ocp->cn_attrs = ap->cs_aotw.csa_fore_chan_attrs; 770 10475 rick if (sle = sess_chan_limits(ocp)) { 771 10475 rick ap->cs_error = sle; 772 10475 rick return (FALSE); 773 10475 rick } 774 6741 th199096 775 6741 th199096 /* 776 6741 th199096 * No need for locks/synchronization at this time, 777 6741 th199096 * since we're barely creating the session. 778 6741 th199096 */ 779 10448 Thomas if (sp->sn_bdrpc) { 780 6741 th199096 /* 781 6741 th199096 * bcsd got built as part of the channel's construction. 782 6741 th199096 */ 783 6741 th199096 if ((bsdp = CTOBSD(ocp)) == NULL) { 784 6741 th199096 cmn_err(CE_PANIC, "Back Chan Spec Data Not Set\t" 785 6741 th199096 "<Internal Inconsistency>"); 786 6741 th199096 } 787 10472 rick bc_slot_tab = ap->cs_aotw.csa_back_chan_attrs.ca_maxrequests; 788 10467 rick slrc_table_create(&bsdp->bsd_stok, bc_slot_tab); 789 10448 Thomas sp->sn_csflags |= CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 790 10448 Thomas sp->sn_back = ocp; 791 6741 th199096 792 6741 th199096 } else { 793 6741 th199096 /* 794 6741 th199096 * If not doing bdrpc, then we expect the client to perform 795 6741 th199096 * an explicit BIND_CONN_TO_SESSION if it wants callback 796 6741 th199096 * traffic. Subsequently, the cb channel should be set up 797 9394 P * at that point along with its corresponding slot (see 798 7397 rick * rfs41_bc_setup). 799 6741 th199096 */ 800 10448 Thomas sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_BACK_CHAN; 801 10448 Thomas sp->sn_back = NULL; 802 6741 th199096 prog = 0; 803 7397 rick 804 7397 rick /* 805 7397 rick * XXX 08/15/2008 (rick) - if the channel is not bidir when 806 7397 rick * created in CREATE_SESSION, then we should save off 807 7397 rick * the ap->cs_aotw.csa_back_chan_attrs in case later 808 7397 rick * a bc2s is called to create the back channel. 809 7397 rick */ 810 6741 th199096 } 811 6741 th199096 812 6741 th199096 /* 813 6741 th199096 * We're just creating the session... there _shouldn't_ be any 814 6741 th199096 * other threads wanting to add connections to this sessions' 815 6741 th199096 * conn list, so we purposefully do _not_ take the ocp->cn_lock 816 6741 th199096 * 817 6741 th199096 * sn_bc fields are all initialized to 0 (via zalloc) 818 6741 th199096 */ 819 6741 th199096 820 10475 rick SVC_CTL(req->rq_xprt, SVCCTL_SET_TAG, (void *)sp->sn_sessid); 821 10448 Thomas 822 10448 Thomas if (sp->sn_bdrpc) { 823 10448 Thomas atomic_add_32(&sp->sn_bc.pngcnt, 1); 824 6741 th199096 } 825 6741 th199096 826 6741 th199096 /* 827 6741 th199096 * Now we allocate space for the slrc, initializing each slot's 828 6741 th199096 * sequenceid and slotid to zero and a (pre)cached result of 829 6741 th199096 * NFS4ERR_SEQ_MISORDERED. Note that we zero out the entries 830 6741 th199096 * by virtue of the z-alloc. 831 6741 th199096 */ 832 9394 P max_slots = ocp->cn_attrs.ca_maxrequests; 833 10448 Thomas slrc_table_create(&sp->sn_replay, max_slots); 834 7397 rick 835 7397 rick /* only initialize bits relevant to session scope */ 836 10448 Thomas bzero(&sp->sn_seq4, sizeof (bit_attr_t) * BITS_PER_WORD); 837 7397 rick for (i = 1; i <= SEQ4_HIGH_BIT && i != 0; i <<= 1) { 838 7397 rick uint32_t idx = log2(i); 839 7397 rick 840 7397 rick switch (i) { 841 7397 rick case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING: 842 7397 rick case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED: 843 7397 rick case SEQ4_STATUS_CB_PATH_DOWN_SESSION: 844 7397 rick case SEQ4_STATUS_BACKCHANNEL_FAULT: 845 10448 Thomas sp->sn_seq4[idx].ba_bit = i; 846 7397 rick break; 847 7397 rick default: 848 7397 rick /* already bzero'ed */ 849 7397 rick break; 850 7397 rick } 851 7397 rick } 852 7397 rick 853 10448 Thomas if (sp->sn_bdrpc) { 854 7397 rick /* 855 7397 rick * Recall that for CB_PATH_DOWN[_SESSION], the refcnt 856 7397 rick * indicates the number of active back channel conns 857 7397 rick */ 858 10448 Thomas rfs41_seq4_hold(&sp->sn_seq4, SEQ4_STATUS_CB_PATH_DOWN_SESSION); 859 10448 Thomas rfs41_seq4_hold(&sp->sn_clnt->rc_seq4, 860 10447 Thomas SEQ4_STATUS_CB_PATH_DOWN); 861 6741 th199096 } 862 6741 th199096 return (TRUE); 863 6741 th199096 } 864 6741 th199096 865 6741 th199096 /* ARGSUSED */ 866 6741 th199096 static void 867 6741 th199096 mds_session_destroy(rfs4_entry_t u_entry) 868 6741 th199096 { 869 10448 Thomas mds_session_t *sp = (mds_session_t *)u_entry; 870 7397 rick sess_bcsd_t *bsdp; 871 7397 rick 872 10448 Thomas if (SN_CB_CHAN_EST(sp) && ((bsdp = CTOBSD(sp->sn_back)) != NULL)) 873 9394 P slrc_table_destroy(bsdp->bsd_stok); 874 6741 th199096 875 6741 th199096 /* 876 6741 th199096 * XXX - A session can have multiple BC clnt handles that need 877 6741 th199096 * to be discarded. mds_session_inval calls CLNT_DESTROY 878 6741 th199096 * which will remove the CB client handle from the global 879 6741 th199096 * list (cb_clnt_list) now. This will have to change once 880 6741 th199096 * we manage the BC clnt handles per session. 881 6741 th199096 */ 882 6741 th199096 883 6741 th199096 /* 884 7813 rick * Remove the fore and back channels. 885 6741 th199096 */ 886 10448 Thomas rfs41_destroy_session_channel(sp, CDFS4_BOTH); 887 6741 th199096 888 6741 th199096 /* 889 6741 th199096 * Nuke slot replay cache for this session 890 6741 th199096 */ 891 10448 Thomas if (sp->sn_replay) { 892 10448 Thomas slrc_table_destroy(sp->sn_replay); 893 10448 Thomas sp->sn_replay = NULL; 894 7813 rick } 895 9214 rick 896 9214 rick /* 897 9214 rick * Remove reference to parent data struct 898 9214 rick */ 899 10448 Thomas if (sp->sn_clnt) 900 10448 Thomas rfs4_client_rele(sp->sn_clnt); 901 6741 th199096 } 902 6741 th199096 903 6741 th199096 static bool_t 904 6741 th199096 mds_session_expiry(rfs4_entry_t u_entry) 905 6741 th199096 { 906 10448 Thomas mds_session_t *sp = (mds_session_t *)u_entry; 907 10448 Thomas 908 10448 Thomas if (sp == NULL || rfs4_dbe_is_invalid(sp->sn_dbe)) 909 9214 rick return (TRUE); 910 9214 rick 911 10448 Thomas if (rfs4_lease_expired(sp->sn_clnt)) 912 6741 th199096 return (TRUE); 913 6741 th199096 914 6741 th199096 return (FALSE); 915 6741 th199096 } 916 6741 th199096 917 7739 jwahlig void 918 7739 jwahlig mds_kill_session_callout(rfs4_entry_t u_entry, void *arg) 919 7739 jwahlig { 920 7739 jwahlig rfs4_client_t *cp = (rfs4_client_t *)arg; 921 10448 Thomas mds_session_t *sp = (mds_session_t *)u_entry; 922 10448 Thomas 923 10448 Thomas if (sp->sn_clnt == cp && !(rfs4_dbe_is_invalid(sp->sn_dbe))) { 924 7813 rick /* 925 7813 rick * client is going away; so no need to check for 926 7813 rick * CB channel traffic before destroying a session. 927 7813 rick */ 928 10448 Thomas rfs4_dbe_invalidate(sp->sn_dbe); 929 9404 Thomas } 930 7739 jwahlig } 931 7739 jwahlig 932 7739 jwahlig void 933 7739 jwahlig mds_clean_up_sessions(rfs4_client_t *cp) 934 7739 jwahlig { 935 7739 jwahlig nfs_server_instance_t *instp; 936 7739 jwahlig 937 10447 Thomas instp = dbe_to_instp(cp->rc_dbe); 938 7739 jwahlig 939 7739 jwahlig if (instp->mds_session_tab != NULL) 940 7739 jwahlig rfs4_dbe_walk(instp->mds_session_tab, 941 7739 jwahlig mds_kill_session_callout, cp); 942 7739 jwahlig } 943 7739 jwahlig 944 6741 th199096 /* 945 6741 th199096 * ----------------------------------------------- 946 6741 th199096 * MDS: Layout tables. 947 6741 th199096 * ----------------------------------------------- 948 6741 th199096 */ 949 6741 th199096 static uint32_t 950 6741 th199096 mds_layout_hash(void *key) 951 6741 th199096 { 952 10016 Thomas layout_core_t *lc = (layout_core_t *)key; 953 10016 Thomas int i; 954 10016 Thomas uint32_t hash = 0; 955 10016 Thomas 956 10016 Thomas if (lc->lc_stripe_count == 0) 957 10016 Thomas return (0); 958 10016 Thomas 959 10016 Thomas /* 960 10016 Thomas * Hash the first mds_sid 961 10016 Thomas */ 962 10016 Thomas for (i = 0; i < lc->lc_mds_sids[0].len; i++) { 963 10016 Thomas hash <<= 1; 964 10016 Thomas hash += (uint_t)lc->lc_mds_sids[0].val[i]; 965 10016 Thomas } 966 10016 Thomas 967 10016 Thomas return (hash); 968 6741 th199096 } 969 6741 th199096 970 6741 th199096 static bool_t 971 6741 th199096 mds_layout_compare(rfs4_entry_t entry, void *key) 972 6741 th199096 { 973 10016 Thomas mds_layout_t *lp = (mds_layout_t *)entry; 974 10016 Thomas layout_core_t *lc = (layout_core_t *)key; 975 10016 Thomas 976 10016 Thomas int i; 977 10016 Thomas 978 10016 Thomas if (lc->lc_stripe_unit == lp->mlo_lc.lc_stripe_unit) { 979 10016 Thomas if (lc->lc_stripe_count == 980 10016 Thomas lp->mlo_lc.lc_stripe_count) { 981 10016 Thomas for (i = 0; i < lc->lc_stripe_count; i++) { 982 10016 Thomas if (lc->lc_mds_sids[i].len != 983 10016 Thomas lp->mlo_lc.lc_mds_sids[i].len) { 984 10016 Thomas return (0); 985 10016 Thomas } 986 10016 Thomas 987 10016 Thomas if (bcmp(lc->lc_mds_sids[i].val, 988 10016 Thomas lp->mlo_lc.lc_mds_sids[i].val, 989 10016 Thomas lc->lc_mds_sids[i].len)) { 990 10016 Thomas return (0); 991 10016 Thomas } 992 10016 Thomas } 993 10016 Thomas 994 10016 Thomas /* 995 10016 Thomas * Everything matches! 996 10016 Thomas */ 997 10016 Thomas return (1); 998 10016 Thomas } 999 10016 Thomas } 1000 10016 Thomas 1001 10016 Thomas return (0); 1002 10016 Thomas } 1003 10016 Thomas 1004 10016 Thomas static void * 1005 10016 Thomas mds_layout_mkkey(rfs4_entry_t entry) 1006 10016 Thomas { 1007 6741 th199096 mds_layout_t *lp = (mds_layout_t *)entry; 1008 6741 th199096 1009 10016 Thomas return ((void *)&lp->mlo_lc); 1010 10016 Thomas } 1011 10016 Thomas 1012 10016 Thomas static uint32_t 1013 10016 Thomas mds_layout_id_hash(void *key) 1014 10016 Thomas { 1015 10016 Thomas return ((uint32_t)(uintptr_t)key); 1016 10016 Thomas } 1017 10016 Thomas 1018 10016 Thomas static bool_t 1019 10016 Thomas mds_layout_id_compare(rfs4_entry_t entry, void *key) 1020 6741 th199096 { 1021 6741 th199096 mds_layout_t *lp = (mds_layout_t *)entry; 1022 6741 th199096 1023 10016 Thomas return (lp->mlo_id == (int)(uintptr_t)key); 1024 10016 Thomas } 1025 10016 Thomas 1026 10016 Thomas static void * 1027 10016 Thomas mds_layout_id_mkkey(rfs4_entry_t entry) 1028 10016 Thomas { 1029 10016 Thomas mds_layout_t *lp = (mds_layout_t *)entry; 1030 10016 Thomas 1031 10016 Thomas return ((void *)(uintptr_t)lp->mlo_id); 1032 10016 Thomas } 1033 10016 Thomas 1034 10016 Thomas typedef struct { 1035 10016 Thomas uint32_t id; 1036 10016 Thomas nfsv4_1_file_layout_ds_addr4 *ds_addr4; 1037 10016 Thomas } mds_addmpd_t; 1038 10016 Thomas 1039 10016 Thomas /* 1040 10016 Thomas * ================================================================ 1041 10016 Thomas * XXX: Both mds_gather_mds_sids and mds_gen_default_layout 1042 10016 Thomas * have been left in to support installations with no 1043 10016 Thomas * policies defined. In short, we do not force people to 1044 10016 Thomas * set up a policy system. Whenever the SMF portion of the 1045 10016 Thomas * code comes along, we will nuke these functions and 1046 10016 Thomas * force a real default to exist. 1047 10016 Thomas * ================================================================ 1048 10016 Thomas */ 1049 6741 th199096 1050 6741 th199096 struct mds_gather_args { 1051 10016 Thomas layout_core_t lc; 1052 10016 Thomas int found; 1053 6741 th199096 }; 1054 6741 th199096 1055 10016 Thomas static void 1056 10016 Thomas mds_gather_mds_sids(rfs4_entry_t entry, void *arg) 1057 10016 Thomas { 1058 10016 Thomas ds_guid_info_t *pgi = (ds_guid_info_t *)entry; 1059 10016 Thomas struct mds_gather_args *gap = (struct mds_gather_args *)arg; 1060 6741 th199096 1061 9407 Thomas int i, j; 1062 9407 Thomas 1063 10016 Thomas if (rfs4_dbe_skip_or_invalid(pgi->dbe)) 1064 10016 Thomas return; 1065 10016 Thomas 1066 10016 Thomas if (gap->found < gap->lc.lc_stripe_count) { 1067 9407 Thomas /* 1068 9407 Thomas * Insert in order. 1069 9407 Thomas */ 1070 10016 Thomas for (i = 0; i < gap->found; i++) { 1071 10016 Thomas if ((pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len < 1072 10016 Thomas gap->lc.lc_mds_sids[i].len) || 1073 10016 Thomas (pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len == 1074 10016 Thomas gap->lc.lc_mds_sids[i].len && 1075 10016 Thomas bcmp(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val, 1076 10016 Thomas gap->lc.lc_mds_sids[i].val, 1077 10016 Thomas gap->lc.lc_mds_sids[i].len) < 0)) { 1078 10016 Thomas for (j = gap->found; j > i; j--) { 1079 10016 Thomas gap->lc.lc_mds_sids[j].len = 1080 10016 Thomas gap->lc.lc_mds_sids[j - 1].len; 1081 10016 Thomas gap->lc.lc_mds_sids[j - 1].val = 1082 10016 Thomas gap->lc.lc_mds_sids[j].val; 1083 10016 Thomas } 1084 9407 Thomas 1085 9407 Thomas break; 1086 9407 Thomas } 1087 9407 Thomas } 1088 9407 Thomas 1089 9407 Thomas /* 1090 10016 Thomas * Either we found it and i is where it goes or we didn't 1091 10016 Thomas * find it and i is the tail. Either way, same thing happens! 1092 10016 Thomas */ 1093 10016 Thomas gap->lc.lc_mds_sids[i].len = 1094 10016 Thomas pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len; 1095 10016 Thomas gap->lc.lc_mds_sids[i].val = 1096 10016 Thomas kmem_alloc(gap->lc.lc_mds_sids[i].len, KM_SLEEP); 1097 10016 Thomas bcopy(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val, 1098 10016 Thomas gap->lc.lc_mds_sids[i].val, 1099 10016 Thomas gap->lc.lc_mds_sids[i].len); 1100 10016 Thomas 1101 10016 Thomas gap->found++; 1102 10016 Thomas } 1103 10016 Thomas } 1104 10016 Thomas 1105 10016 Thomas int mds_default_stripe = 32; 1106 10016 Thomas 1107 10016 Thomas mds_layout_t * 1108 10016 Thomas mds_gen_default_layout(nfs_server_instance_t *instp) 1109 10016 Thomas { 1110 10016 Thomas struct mds_gather_args gap; 1111 10016 Thomas mds_layout_t *lp; 1112 10016 Thomas 1113 10016 Thomas int i; 1114 10016 Thomas 1115 10016 Thomas bzero(&gap, sizeof (gap)); 1116 10016 Thomas 1117 10016 Thomas gap.found = 0; 1118 10016 Thomas 1119 10016 Thomas rw_enter(&instp->ds_guid_info_lock, RW_READER); 1120 10016 Thomas gap.lc.lc_stripe_count = instp->ds_guid_info_count; 1121 10016 Thomas rw_exit(&instp->ds_guid_info_lock); 1122 10016 Thomas 1123 10016 Thomas gap.lc.lc_mds_sids = kmem_zalloc(gap.lc.lc_stripe_count * 1124 10016 Thomas sizeof (mds_sid), KM_SLEEP); 1125 10016 Thomas 1126 10016 Thomas rw_enter(&instp->ds_guid_info_lock, RW_READER); 1127 10016 Thomas rfs4_dbe_walk(instp->ds_guid_info_tab, mds_gather_mds_sids, &gap); 1128 10016 Thomas rw_exit(&instp->ds_guid_info_lock); 1129 10016 Thomas 1130 10016 Thomas /* 1131 10016 Thomas * If we didn't find any devices then we do no service 1132 10016 Thomas */ 1133 10016 Thomas if (gap.found == 0) { 1134 10016 Thomas kmem_free(gap.lc.lc_mds_sids, gap.lc.lc_stripe_count * 1135 10016 Thomas sizeof (mds_sid)); 1136 10016 Thomas return (NULL); 1137 10016 Thomas } 1138 10016 Thomas 1139 10016 Thomas /* 1140 10016 Thomas * XXX: What if found != stripe_count ? 1141 10016 Thomas */ 1142 10016 Thomas 1143 10016 Thomas gap.lc.lc_stripe_unit = mds_default_stripe * 1024; 1144 10016 Thomas 1145 10016 Thomas rw_enter(&instp->mds_layout_lock, RW_WRITER); 1146 10016 Thomas lp = (mds_layout_t *)rfs4_dbcreate(instp->mds_layout_idx, 1147 10016 Thomas (void *)&gap.lc); 1148 10016 Thomas if (lp) { 1149 10016 Thomas instp->mds_layout_default_idx = lp->mlo_id; 1150 10016 Thomas } 1151 10016 Thomas rw_exit(&instp->mds_layout_lock); 1152 10016 Thomas 1153 10016 Thomas for (i = 0; i < gap.lc.lc_stripe_count; i++) { 1154 10016 Thomas kmem_free(gap.lc.lc_mds_sids[i].val, 1155 10016 Thomas gap.lc.lc_mds_sids[i].len); 1156 10016 Thomas } 1157 10016 Thomas 1158 10016 Thomas kmem_free(gap.lc.lc_mds_sids, gap.lc.lc_stripe_count * 1159 10016 Thomas sizeof (mds_sid)); 1160 10016 Thomas return (lp); 1161 10016 Thomas } 1162 10016 Thomas 1163 10016 Thomas /* ================================================================ */ 1164 10016 Thomas 1165 10016 Thomas 1166 10016 Thomas /* 1167 10016 Thomas * Given a layout, which now is comprised of mds_dataset_ids, instead of 1168 10016 Thomas * devices, generate the list of devices... 1169 10016 Thomas */ 1170 10016 Thomas static mds_mpd_t * 1171 10016 Thomas mds_gen_mpd(nfs_server_instance_t *instp, mds_layout_t *lp) 1172 10016 Thomas { 1173 10016 Thomas nfsv4_1_file_layout_ds_addr4 ds_dev; 1174 6741 th199096 1175 9404 Thomas /* 1176 9404 Thomas * The key to understanding the way these data structures 1177 9404 Thomas * interact is that map points to ds_dev. And map is stuck 1178 9404 Thomas * into the mds_mpd_idx database. 1179 9404 Thomas */ 1180 10016 Thomas mds_addmpd_t map = { .id = 0, .ds_addr4 = &ds_dev }; 1181 10016 Thomas mds_mpd_t *mp = NULL; 1182 10016 Thomas uint_t len; 1183 10016 Thomas int i, iLoaded = 0; 1184 10016 Thomas uint32_t *sivp; 1185 10016 Thomas multipath_list4 *mplp; 1186 10016 Thomas 1187 10016 Thomas ds_addrlist_t **adp = NULL; 1188 10016 Thomas 1189 10016 Thomas ASSERT(instp->mds_mpd_id_space != NULL); 1190 10016 Thomas map.id = id_alloc(instp->mds_mpd_id_space); 1191 6741 th199096 1192 6741 th199096 /* 1193 6741 th199096 * build a nfsv4_1_file_layout_ds_addr4, encode it and 1194 6741 th199096 * cache it in state_store. 1195 6741 th199096 */ 1196 10016 Thomas len = lp->mlo_lc.lc_stripe_count; 1197 6741 th199096 1198 6741 th199096 /* allocate space for the indices */ 1199 6741 th199096 sivp = ds_dev.nflda_stripe_indices.nflda_stripe_indices_val = 1200 6741 th199096 kmem_zalloc(len * sizeof (uint32_t), KM_SLEEP); 1201 6741 th199096 1202 6741 th199096 ds_dev.nflda_stripe_indices.nflda_stripe_indices_len = len; 1203 6741 th199096 1204 6741 th199096 /* populate the stripe indices */ 1205 10016 Thomas for (i = 0; i < len; i++) 1206 10016 Thomas sivp[i] = i; 1207 6741 th199096 1208 6741 th199096 /* 1209 6741 th199096 * allocate space for the multipath_list4 (for now we just 1210 6741 th199096 * have the one path) 1211 6741 th199096 */ 1212 6741 th199096 mplp = ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_val = 1213 6741 th199096 kmem_zalloc(len * sizeof (multipath_list4), KM_SLEEP); 1214 6741 th199096 1215 6741 th199096 ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_len = len; 1216 6741 th199096 1217 10016 Thomas adp = kmem_zalloc(len * sizeof (ds_addrlist_t *), KM_SLEEP); 1218 10016 Thomas 1219 6741 th199096 /* 1220 7739 jwahlig * Now populate the netaddrs using the stashed ds_addr 1221 6741 th199096 * pointers 1222 6741 th199096 */ 1223 10016 Thomas for (i = 0; i < len; i++) { 1224 10016 Thomas ds_addrlist_t *dp; 1225 10016 Thomas 1226 10016 Thomas mplp[i].multipath_list4_len = 1; 1227 10016 Thomas dp = mds_find_ds_addrlist_by_mds_sid(instp, 1228 10016 Thomas &lp->mlo_lc.lc_mds_sids[i]); 1229 10016 Thomas if (!dp) { 1230 10016 Thomas iLoaded = i; 1231 10016 Thomas goto cleanup; 1232 10016 Thomas } 1233 10016 Thomas 1234 10016 Thomas mplp[i].multipath_list4_val = &dp->dev_addr; 1235 10016 Thomas adp[i] = dp; 1236 10016 Thomas } 1237 10016 Thomas 1238 10016 Thomas iLoaded = len; 1239 6741 th199096 1240 6741 th199096 /* 1241 6741 th199096 * Add the multipath_list4, this will encode and cache 1242 6741 th199096 * the result. 1243 6741 th199096 */ 1244 7739 jwahlig rw_enter(&instp->mds_mpd_lock, RW_WRITER); 1245 10016 Thomas 1246 10016 Thomas /* 1247 10016 Thomas * XXX: Each layout has its own mpd. 1248 10016 Thomas * 1249 10016 Thomas * Note that we should fix this.... 1250 10016 Thomas */ 1251 7739 jwahlig mp = (mds_mpd_t *)rfs4_dbcreate(instp->mds_mpd_idx, (void *)&map); 1252 10016 Thomas if (mp) { 1253 10016 Thomas lp->mlo_mpd_id = mp->mpd_id; 1254 10016 Thomas 1255 10016 Thomas /* 1256 10016 Thomas * Put the layout on the layouts list. 1257 10016 Thomas * Note that we don't decrement the refcnt 1258 10016 Thomas * here, we keep a hold on it for inserting 1259 10016 Thomas * this layout on it. 1260 10016 Thomas */ 1261 10016 Thomas list_insert_tail(&mp->mpd_layouts_list, lp); 1262 10016 Thomas } 1263 10016 Thomas 1264 7739 jwahlig rw_exit(&instp->mds_mpd_lock); 1265 6741 th199096 1266 10016 Thomas cleanup: 1267 10016 Thomas 1268 10016 Thomas for (i = 0; i < iLoaded; i++) { 1269 10016 Thomas rfs4_dbe_rele(adp[i]->dbe); 1270 10016 Thomas } 1271 10016 Thomas 1272 10016 Thomas kmem_free(adp, len * sizeof (ds_addrlist_t *)); 1273 6741 th199096 kmem_free(mplp, len * sizeof (multipath_list4)); 1274 6741 th199096 kmem_free(sivp, len * sizeof (uint32_t)); 1275 10016 Thomas 1276 10016 Thomas if (mp == NULL) 1277 10016 Thomas id_free(instp->mds_mpd_id_space, map.id); 1278 10016 Thomas 1279 6741 th199096 return (mp); 1280 6741 th199096 } 1281 6741 th199096 1282 10016 Thomas void 1283 10016 Thomas mds_nuke_layout(nfs_server_instance_t *instp, uint32_t mlo_id) 1284 6741 th199096 { 1285 6741 th199096 bool_t create = FALSE; 1286 6741 th199096 rfs4_entry_t e; 1287 6741 th199096 1288 7739 jwahlig rw_enter(&instp->mds_layout_lock, RW_WRITER); 1289 10016 Thomas if ((e = rfs4_dbsearch(instp->mds_layout_ID_idx, 1290 10016 Thomas (void *)(uintptr_t)mlo_id, 1291 7739 jwahlig &create, 1292 7739 jwahlig NULL, 1293 7739 jwahlig RFS4_DBS_VALID)) != NULL) { 1294 6741 th199096 rfs4_dbe_invalidate(e->dbe); 1295 9404 Thomas rfs4_dbe_rele(e->dbe); 1296 6741 th199096 } 1297 7739 jwahlig rw_exit(&instp->mds_layout_lock); 1298 6741 th199096 } 1299 6741 th199096 1300 6741 th199096 /*ARGSUSED*/ 1301 6741 th199096 static bool_t 1302 7739 jwahlig mds_layout_create(rfs4_entry_t u_entry, void *arg) 1303 6741 th199096 { 1304 10016 Thomas mds_layout_t *lp = (mds_layout_t *)u_entry; 1305 10016 Thomas layout_core_t *lc = (layout_core_t *)arg; 1306 10016 Thomas 1307 10016 Thomas nfs_server_instance_t *instp; 1308 10016 Thomas int i; 1309 9404 Thomas bool_t rc = TRUE; 1310 6741 th199096 1311 10447 Thomas instp = dbe_to_instp(lp->mlo_dbe); 1312 10447 Thomas 1313 10447 Thomas lp->mlo_id = rfs4_dbe_getid(lp->mlo_dbe); 1314 10016 Thomas 1315 10016 Thomas lp->mlo_type = LAYOUT4_NFSV4_1_FILES; 1316 10016 Thomas lp->mlo_lc.lc_stripe_unit = lc->lc_stripe_unit; 1317 10016 Thomas lp->mlo_lc.lc_stripe_count = lc->lc_stripe_count; 1318 10016 Thomas 1319 10016 Thomas lp->mlo_lc.lc_mds_sids = kmem_zalloc(lp->mlo_lc.lc_stripe_count * 1320 10016 Thomas sizeof (mds_sid), KM_SLEEP); 1321 10016 Thomas 1322 10016 Thomas for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) { 1323 10016 Thomas lp->mlo_lc.lc_mds_sids[i].len = lc->lc_mds_sids[i].len; 1324 10016 Thomas lp->mlo_lc.lc_mds_sids[i].val = 1325 10016 Thomas kmem_alloc(lp->mlo_lc.lc_mds_sids[i].len, KM_SLEEP); 1326 10016 Thomas bcopy(lc->lc_mds_sids[i].val, lp->mlo_lc.lc_mds_sids[i].val, 1327 10016 Thomas lp->mlo_lc.lc_mds_sids[i].len); 1328 10016 Thomas } 1329 6741 th199096 1330 6741 th199096 /* Need to generate a device for this layout */ 1331 10016 Thomas lp->mlo_mpd = mds_gen_mpd(instp, lp); 1332 10016 Thomas if (lp->mlo_mpd == NULL) { 1333 10016 Thomas for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) { 1334 10016 Thomas kmem_free(lp->mlo_lc.lc_mds_sids[i].val, 1335 10016 Thomas lp->mlo_lc.lc_mds_sids[i].len); 1336 10016 Thomas } 1337 10016 Thomas 1338 10016 Thomas kmem_free(lp->mlo_lc.lc_mds_sids, lp->mlo_lc.lc_stripe_count * 1339 10016 Thomas sizeof (mds_sid)); 1340 10016 Thomas lp->mlo_lc.lc_mds_sids = NULL; 1341 10016 Thomas rc = FALSE; 1342 9404 Thomas } 1343 9404 Thomas 1344 9404 Thomas return (rc); 1345 6741 th199096 } 1346 6741 th199096 1347 6741 th199096 /*ARGSUSED*/ 1348 6741 th199096 static void 1349 9404 Thomas mds_layout_destroy(rfs4_entry_t u_entry) 1350 6741 th199096 { 1351 10016 Thomas mds_layout_t *lp = (mds_layout_t *)u_entry; 1352 10016 Thomas nfs_server_instance_t *instp; 1353 10016 Thomas int i; 1354 10016 Thomas 1355 10016 Thomas instp = dbe_to_instp(u_entry->dbe); 1356 10016 Thomas 1357 10016 Thomas rw_enter(&instp->mds_mpd_lock, RW_WRITER); 1358 10016 Thomas if (lp->mlo_mpd != NULL) { 1359 10016 Thomas list_remove(&lp->mlo_mpd->mpd_layouts_list, lp); 1360 10447 Thomas rfs4_dbe_rele(lp->mlo_mpd->mpd_dbe); 1361 10016 Thomas lp->mlo_mpd = NULL; 1362 10016 Thomas } 1363 10016 Thomas rw_exit(&instp->mds_mpd_lock); 1364 10016 Thomas 1365 10016 Thomas if (lp->mlo_lc.lc_mds_sids != NULL) { 1366 10016 Thomas for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) { 1367 10016 Thomas kmem_free(lp->mlo_lc.lc_mds_sids[i].val, 1368 10016 Thomas lp->mlo_lc.lc_mds_sids[i].len); 1369 10016 Thomas } 1370 10016 Thomas 1371 10016 Thomas kmem_free(lp->mlo_lc.lc_mds_sids, lp->mlo_lc.lc_stripe_count * 1372 10016 Thomas sizeof (mds_sid)); 1373 10016 Thomas lp->mlo_lc.lc_mds_sids = NULL; 1374 10016 Thomas } 1375 6741 th199096 } 1376 6741 th199096 1377 9215 James mds_layout_t * 1378 10016 Thomas mds_add_layout(layout_core_t *lc) 1379 6741 th199096 { 1380 6741 th199096 bool_t create = FALSE; 1381 10016 Thomas mds_layout_t *lp; 1382 6741 th199096 1383 7739 jwahlig rw_enter(&mds_server->mds_layout_lock, RW_WRITER); 1384 6741 th199096 1385 10016 Thomas /* 1386 10016 Thomas * If it is already in memory, then we can just 1387 10016 Thomas * bump the refcnt. 1388 10016 Thomas */ 1389 10016 Thomas lp = (mds_layout_t *)rfs4_dbsearch(mds_server->mds_layout_idx, 1390 10016 Thomas (void *)lc, &create, NULL, 1391 10016 Thomas RFS4_DBS_VALID); 1392 10016 Thomas if (lp != NULL) { 1393 10016 Thomas rw_exit(&mds_server->mds_layout_lock); 1394 10016 Thomas return (lp); 1395 10016 Thomas } 1396 10016 Thomas 1397 10016 Thomas lp = (mds_layout_t *)rfs4_dbcreate(mds_server->mds_layout_idx, 1398 10016 Thomas (void *)lc); 1399 9215 James rw_exit(&mds_server->mds_layout_lock); 1400 10016 Thomas 1401 10016 Thomas if (lp == NULL) { 1402 6741 th199096 printf("mds_add_layout: failed\n"); 1403 6741 th199096 (void) set_errno(EFAULT); 1404 6741 th199096 } 1405 6741 th199096 1406 10016 Thomas return (lp); 1407 6741 th199096 } 1408 6741 th199096 1409 6741 th199096 #define ADDRHASH(key) ((unsigned long)(key) >> 3) 1410 6741 th199096 1411 6741 th199096 /* 1412 6741 th199096 * ----------------------------------------------- 1413 6741 th199096 * MDS: Layout Grant tables. 1414 6741 th199096 * ----------------------------------------------- 1415 6741 th199096 * 1416 6741 th199096 */ 1417 6741 th199096 static uint32_t 1418 6741 th199096 mds_layout_grant_hash(void *key) 1419 6741 th199096 { 1420 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)key; 1421 10447 Thomas 1422 10447 Thomas return (ADDRHASH(lg->lo_cp) ^ ADDRHASH(lg->lo_fp)); 1423 6741 th199096 } 1424 6741 th199096 1425 6741 th199096 static bool_t 1426 7739 jwahlig mds_layout_grant_compare(rfs4_entry_t u_entry, void *key) 1427 6741 th199096 { 1428 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)u_entry; 1429 10447 Thomas mds_layout_grant_t *klg = (mds_layout_grant_t *)key; 1430 10447 Thomas 1431 10447 Thomas return (lg->lo_cp == klg->lo_cp && lg->lo_fp == klg->lo_fp); 1432 6741 th199096 } 1433 6741 th199096 1434 6741 th199096 static void * 1435 6741 th199096 mds_layout_grant_mkkey(rfs4_entry_t entry) 1436 6741 th199096 { 1437 6741 th199096 return (entry); 1438 6741 th199096 } 1439 6741 th199096 1440 8981 James #ifdef NOT_USED_NOW 1441 6741 th199096 static uint32_t 1442 7739 jwahlig mds_layout_grant_id_hash(void *key) 1443 6741 th199096 { 1444 7739 jwahlig stateid_t *id = (stateid_t *)key; 1445 7739 jwahlig 1446 7739 jwahlig return (id->v41_bits.state_ident); 1447 6741 th199096 } 1448 6741 th199096 1449 6741 th199096 static bool_t 1450 7739 jwahlig mds_layout_grant_id_compare(rfs4_entry_t entry, void *key) 1451 6741 th199096 { 1452 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)entry; 1453 7739 jwahlig stateid_t *id = (stateid_t *)key; 1454 7739 jwahlig bool_t rc; 1455 7739 jwahlig 1456 7739 jwahlig if (id->v41_bits.type != LAYOUTID) 1457 7739 jwahlig return (FALSE); 1458 7739 jwahlig 1459 10447 Thomas rc = (lg->lo_stateid.v41_bits.boottime == id->v41_bits.boottime && 1460 10447 Thomas lg->lo_stateid.v41_bits.state_ident == id->v41_bits.state_ident); 1461 7739 jwahlig 1462 7739 jwahlig return (rc); 1463 6741 th199096 } 1464 6741 th199096 1465 6741 th199096 static void * 1466 7739 jwahlig mds_layout_grant_id_mkkey(rfs4_entry_t entry) 1467 6741 th199096 { 1468 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)entry; 1469 10447 Thomas 1470 10447 Thomas return (&lg->lo_stateid); 1471 6741 th199096 } 1472 8981 James #endif 1473 7739 jwahlig 1474 6741 th199096 /*ARGSUSED*/ 1475 6741 th199096 static bool_t 1476 7739 jwahlig mds_layout_grant_create(rfs4_entry_t u_entry, void *arg) 1477 6741 th199096 { 1478 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)u_entry; 1479 10447 Thomas rfs4_file_t *fp = ((mds_layout_grant_t *)arg)->lo_fp; 1480 10447 Thomas rfs4_client_t *cp = ((mds_layout_grant_t *)arg)->lo_cp; 1481 10447 Thomas 1482 10455 Thomas /* 1483 10455 Thomas * We hold onto the rfs4_file_t until we are done with it. 1484 10455 Thomas */ 1485 10447 Thomas rfs4_dbe_hold(fp->rf_dbe); 1486 10447 Thomas 1487 10447 Thomas lg->lo_status = LO_GRANTED; 1488 10447 Thomas lg->lo_stateid = mds_create_stateid(lg->lo_dbe, LAYOUTID); 1489 10447 Thomas lg->lo_fp = fp; 1490 10447 Thomas lg->lo_cp = cp; 1491 10447 Thomas lg->lor_seqid = lg->lor_reply = 0; 1492 10447 Thomas mutex_init(&lg->lo_lock, NULL, MUTEX_DEFAULT, NULL); 1493 7739 jwahlig 1494 7739 jwahlig /* Init layout grant lists for remque/insque */ 1495 10447 Thomas lg->lo_grant_list.next = lg->lo_grant_list.prev = 1496 10447 Thomas &lg->lo_grant_list; 1497 10447 Thomas lg->lo_grant_list.lg = lg; 1498 10447 Thomas 1499 10447 Thomas lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev = 1500 10447 Thomas &lg->lo_clientgrantlist; 1501 10447 Thomas lg->lo_clientgrantlist.lg = lg; 1502 10447 Thomas 1503 10447 Thomas lg->lo_range = nfs_range_create(); 1504 8427 James 1505 6741 th199096 return (TRUE); 1506 6741 th199096 } 1507 6741 th199096 1508 6741 th199096 /*ARGSUSED*/ 1509 6741 th199096 static void 1510 8041 rick mds_layout_grant_destroy(rfs4_entry_t entry) 1511 6741 th199096 { 1512 10447 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)entry; 1513 10455 Thomas 1514 10455 Thomas /* 1515 10455 Thomas * The code which invalidated this node should have 1516 10455 Thomas * gone ahead and released the rfs4_file_t. 1517 10455 Thomas */ 1518 10461 Thomas ASSERT(lg->lo_fp == NULL); 1519 10447 Thomas 1520 10447 Thomas mutex_destroy(&lg->lo_lock); 1521 10447 Thomas 1522 10447 Thomas nfs_range_destroy(lg->lo_range); 1523 10447 Thomas lg->lo_range = NULL; 1524 6741 th199096 } 1525 6741 th199096 1526 7739 jwahlig mds_layout_grant_t * 1527 7739 jwahlig rfs41_findlogrant(struct compound_state *cs, rfs4_file_t *fp, 1528 7739 jwahlig rfs4_client_t *cp, bool_t *create) 1529 7739 jwahlig { 1530 10447 Thomas mds_layout_grant_t args, *lg; 1531 10447 Thomas 1532 10447 Thomas args.lo_cp = cp; 1533 10447 Thomas args.lo_fp = fp; 1534 10447 Thomas 1535 10447 Thomas lg = (mds_layout_grant_t *)rfs4_dbsearch( 1536 10447 Thomas cs->instp->mds_layout_grant_idx, &args, create, 1537 10447 Thomas &args, RFS4_DBS_VALID); 1538 10447 Thomas 1539 10447 Thomas return (lg); 1540 10447 Thomas } 1541 10447 Thomas 1542 10447 Thomas void 1543 10447 Thomas rfs41_lo_grant_hold(mds_layout_grant_t *lg) 1544 10447 Thomas { 1545 10447 Thomas rfs4_dbe_hold(lg->lo_dbe); 1546 10447 Thomas } 1547 10447 Thomas 1548 10447 Thomas void 1549 10447 Thomas rfs41_lo_grant_rele(mds_layout_grant_t *lg) 1550 10447 Thomas { 1551 10447 Thomas rfs4_dbe_rele(lg->lo_dbe); 1552 7812 James } 1553 7812 James 1554 7812 James /* 1555 7812 James * ----------------------------------------------- 1556 7812 James * MDS: Ever Grant tables. 1557 7812 James * ----------------------------------------------- 1558 7812 James * 1559 7812 James */ 1560 7812 James static uint32_t 1561 7812 James mds_ever_grant_hash(void *key) 1562 7812 James { 1563 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)key; 1564 10447 Thomas 1565 10447 Thomas return (ADDRHASH(eg->eg_cp) ^ ADDRHASH(eg->eg_key)); 1566 7812 James } 1567 7812 James 1568 7812 James static bool_t 1569 7812 James mds_ever_grant_compare(rfs4_entry_t u_entry, void *key) 1570 7812 James { 1571 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry; 1572 10447 Thomas mds_ever_grant_t *keg = (mds_ever_grant_t *)key; 1573 10447 Thomas 1574 10447 Thomas return (eg->eg_cp == keg->eg_cp && 1575 10447 Thomas eg->eg_fsid.val[0] == keg->eg_fsid.val[0] && 1576 10447 Thomas eg->eg_fsid.val[1] == keg->eg_fsid.val[1]); 1577 7812 James } 1578 7812 James 1579 7812 James static void * 1580 7812 James mds_ever_grant_mkkey(rfs4_entry_t entry) 1581 7812 James { 1582 7812 James return (entry); 1583 7812 James } 1584 7812 James 1585 7812 James static bool_t 1586 7812 James mds_ever_grant_fsid_compare(rfs4_entry_t entry, void *key) 1587 7812 James { 1588 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)entry; 1589 7812 James int64_t g_key = (int64_t)(uintptr_t)key; 1590 7812 James 1591 10447 Thomas return (eg->eg_key == g_key); 1592 7812 James } 1593 7812 James 1594 8981 James #ifdef NOT_USED_NOW 1595 8981 James static uint32_t 1596 8981 James mds_ever_grant_fsid_hash(void *key) 1597 8981 James { 1598 8981 James return ((uint32_t)(uintptr_t)key); 1599 8981 James } 1600 8981 James 1601 7812 James static void * 1602 7812 James mds_ever_grant_fsid_mkkey(rfs4_entry_t entry) 1603 7812 James { 1604 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)entry; 1605 10447 Thomas 1606 10447 Thomas return ((void*)(uintptr_t)eg->eg_key); 1607 7812 James } 1608 8981 James #endif 1609 7812 James 1610 7812 James /*ARGSUSED*/ 1611 7812 James static bool_t 1612 7812 James mds_ever_grant_create(rfs4_entry_t u_entry, void *arg) 1613 7812 James { 1614 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry; 1615 10447 Thomas rfs4_client_t *cp = ((mds_ever_grant_t *)arg)->eg_cp; 1616 10447 Thomas 1617 10447 Thomas eg->eg_cp = cp; 1618 10447 Thomas eg->eg_fsid = ((mds_ever_grant_t *)arg)->eg_fsid; 1619 7812 James 1620 7812 James return (TRUE); 1621 7812 James } 1622 7812 James 1623 7812 James /*ARGSUSED*/ 1624 7812 James static void 1625 7812 James mds_ever_grant_destroy(rfs4_entry_t foo) 1626 7812 James { 1627 7812 James } 1628 7812 James 1629 7812 James mds_ever_grant_t * 1630 7812 James rfs41_findevergrant(rfs4_client_t *cp, vnode_t *vp, bool_t *create) 1631 7812 James { 1632 7812 James nfs_server_instance_t *instp; 1633 10447 Thomas mds_ever_grant_t args, *eg; 1634 10447 Thomas 1635 10447 Thomas instp = dbe_to_instp(cp->rc_dbe); 1636 10447 Thomas args.eg_cp = cp; 1637 10447 Thomas args.eg_fsid = vp->v_vfsp->vfs_fsid; 1638 10447 Thomas 1639 10447 Thomas eg = (mds_ever_grant_t *)rfs4_dbsearch( 1640 10447 Thomas instp->mds_ever_grant_idx, &args, create, &args, 1641 10447 Thomas RFS4_DBS_VALID); 1642 10447 Thomas 1643 10447 Thomas return (eg); 1644 10447 Thomas } 1645 10447 Thomas 1646 10447 Thomas void 1647 10447 Thomas rfs41_ever_grant_rele(mds_ever_grant_t *eg) 1648 10447 Thomas { 1649 10447 Thomas rfs4_dbe_rele(eg->eg_dbe); 1650 7812 James } 1651 7812 James 1652 7812 James void 1653 8439 James mds_kill_eg_callout(rfs4_entry_t u_entry, void *arg) 1654 8439 James { 1655 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry; 1656 8439 James rfs4_client_t *cp = (rfs4_client_t *)arg; 1657 8439 James 1658 10447 Thomas if (eg->eg_cp == cp) { 1659 10447 Thomas eg->eg_cp = NULL; 1660 10447 Thomas rfs4_dbe_invalidate(eg->eg_dbe); 1661 10447 Thomas rfs4_dbe_rele_nolock(eg->eg_dbe); 1662 8439 James } 1663 8439 James } 1664 8439 James 1665 8439 James void 1666 7812 James mds_clean_up_grants(rfs4_client_t *cp) 1667 7812 James { 1668 10447 Thomas mds_layout_grant_t *lg; 1669 10447 Thomas nfs_server_instance_t *instp; 1670 10447 Thomas 1671 10447 Thomas rfs4_dbe_lock(cp->rc_dbe); 1672 10447 Thomas while (cp->rc_clientgrantlist.next->lg != NULL) { 1673 10447 Thomas lg = cp->rc_clientgrantlist.next->lg; 1674 10447 Thomas remque(&lg->lo_clientgrantlist); 1675 10447 Thomas lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev = 1676 10447 Thomas &lg->lo_clientgrantlist; 1677 10447 Thomas lg->lo_cp = NULL; 1678 10447 Thomas 1679 10447 Thomas rfs4_dbe_lock(lg->lo_fp->rf_dbe); 1680 10447 Thomas remque(&lg->lo_grant_list); 1681 10447 Thomas rfs4_dbe_unlock(lg->lo_fp->rf_dbe); 1682 10447 Thomas 1683 10447 Thomas lg->lo_grant_list.next = lg->lo_grant_list.prev = 1684 10447 Thomas &lg->lo_grant_list; 1685 10447 Thomas rfs4_file_rele(lg->lo_fp); 1686 10447 Thomas 1687 10447 Thomas lg->lo_fp = NULL; 1688 10447 Thomas rfs4_dbe_invalidate(lg->lo_dbe); 1689 10447 Thomas rfs41_lo_grant_rele(lg); 1690 10447 Thomas } 1691 10447 Thomas 1692 10447 Thomas instp = dbe_to_instp(cp->rc_dbe); 1693 10447 Thomas rfs4_dbe_unlock(cp->rc_dbe); 1694 9215 James 1695 9215 James rw_enter(&instp->mds_ever_grant_lock, RW_READER); 1696 8439 James rfs4_dbe_walk(instp->mds_ever_grant_tab, mds_kill_eg_callout, cp); 1697 9215 James rw_exit(&instp->mds_ever_grant_lock); 1698 9215 James } 1699 9215 James 1700 9215 James struct grant_arg { 1701 9215 James rfs4_client_t *cp; 1702 9215 James vnode_t *vp; 1703 9215 James }; 1704 9215 James 1705 9215 James void 1706 9215 James mds_rm_grant_callout(rfs4_entry_t u_entry, void *arg) 1707 9215 James { 1708 10455 Thomas mds_layout_grant_t *lg = (mds_layout_grant_t *)u_entry; 1709 10455 Thomas struct grant_arg *ga = (struct grant_arg *)arg; 1710 10455 Thomas vnode_t *vp; 1711 10455 Thomas 1712 10455 Thomas if (rfs4_dbe_skip_or_invalid(lg->lo_dbe)) { 1713 10455 Thomas ASSERT(lg->lo_fp == NULL); 1714 10455 Thomas return; 1715 10455 Thomas } 1716 10455 Thomas 1717 10455 Thomas ASSERT(lg->lo_fp != NULL); 1718 10455 Thomas vp = lg->lo_fp->rf_vp; 1719 10447 Thomas 1720 10447 Thomas if (ga->cp == lg->lo_cp && vp && ga->vp->v_vfsp == vp->v_vfsp) { 1721 10447 Thomas rfs4_dbe_lock(lg->lo_cp->rc_dbe); 1722 10447 Thomas remque(&lg->lo_clientgrantlist); 1723 10447 Thomas rfs4_dbe_unlock(lg->lo_cp->rc_dbe); 1724 10447 Thomas 1725 10447 Thomas lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev = 1726 10447 Thomas &lg->lo_clientgrantlist; 1727 10447 Thomas lg->lo_cp = NULL; 1728 10447 Thomas 1729 10447 Thomas rfs4_dbe_lock(lg->lo_fp->rf_dbe); 1730 10447 Thomas remque(&lg->lo_grant_list); 1731 10447 Thomas rfs4_dbe_unlock(lg->lo_fp->rf_dbe); 1732 10447 Thomas 1733 10447 Thomas lg->lo_grant_list.next = lg->lo_grant_list.prev = 1734 10447 Thomas &lg->lo_grant_list; 1735 10447 Thomas rfs4_file_rele(lg->lo_fp); 1736 10447 Thomas 1737 10447 Thomas lg->lo_fp = NULL; 1738 10447 Thomas rfs4_dbe_invalidate(lg->lo_dbe); 1739 10447 Thomas rfs4_dbe_rele_nolock(lg->lo_dbe); 1740 9215 James } 1741 9215 James } 1742 9215 James 1743 9215 James void 1744 9215 James mds_clean_grants_by_fsid(rfs4_client_t *cp, vnode_t *vp) 1745 9215 James { 1746 9215 James struct grant_arg ga; 1747 9215 James nfs_server_instance_t *instp; 1748 9215 James 1749 9215 James ga.cp = cp; 1750 9215 James ga.vp = vp; 1751 10447 Thomas instp = dbe_to_instp(cp->rc_dbe); 1752 9215 James 1753 9215 James rw_enter(&instp->mds_layout_grant_lock, RW_READER); 1754 9215 James rfs4_dbe_walk(instp->mds_layout_grant_tab, mds_rm_grant_callout, &ga); 1755 9215 James rw_exit(&instp->mds_layout_grant_lock); 1756 7739 jwahlig } 1757 7739 jwahlig 1758 8035 James /* 1759 8035 James * Conforms to Section 12.5.5.2.1.4 of draft-25 1760 8035 James */ 1761 8035 James void 1762 8035 James rfs41_lo_seqid(stateid_t *sp) 1763 8035 James { 1764 8035 James if (sp == NULL) 1765 8035 James return; 1766 8035 James 1767 8035 James if ((sp->v41_bits.chgseq + 1) & (uint32_t)~0) 1768 8035 James atomic_inc_32(&sp->v41_bits.chgseq); 1769 8035 James else 1770 8035 James (void) atomic_swap_32(&sp->v41_bits.chgseq, 1); 1771 8035 James } 1772 8035 James 1773 8041 rick bool_t 1774 10447 Thomas rfs41_lo_still_granted(mds_layout_grant_t *lg) 1775 8041 rick { 1776 8441 rick bool_t found = TRUE; 1777 8041 rick 1778 8439 James /* 1779 8439 James * We currently have the layout grant, but is it still valid? 1780 8439 James * If it has been returned, then the status will be updated as 1781 8439 James * returned or recalled. However, it is possible that the client 1782 8439 James * has gone away while we are still holding this. When the client 1783 8439 James * is cleaned up, the pointer to the client and the file will be 1784 8439 James * set to NULL and it will have been removed from all lists, waiting 1785 8439 James * to be released and reaped. In this case, the status may not 1786 8439 James * have been updated. 1787 8439 James */ 1788 10447 Thomas rfs4_dbe_lock(lg->lo_dbe); 1789 10447 Thomas if (lg->lo_status == LO_RETURNED || lg->lo_status == LO_RECALLED || 1790 10447 Thomas lg->lo_cp == NULL) 1791 8439 James found = FALSE; 1792 10447 Thomas rfs4_dbe_unlock(lg->lo_dbe); 1793 8441 rick 1794 8041 rick return (found); 1795 8041 rick } 1796 8041 rick 1797 8041 rick static void 1798 10447 Thomas rfs41_revoke_layout(mds_layout_grant_t *lg) 1799 8041 rick { 1800 8041 rick cmn_err(CE_NOTE, "rfs41_revoke_layout: layout revoked"); 1801 10447 Thomas rfs41_seq4_hold(&lg->lo_cp->rc_seq4, 1802 10447 Thomas SEQ4_STATUS_RECALLABLE_STATE_REVOKED); 1803 8041 rick 1804 8041 rick /* XXX - rest of this function TBD */ 1805 8041 rick } 1806 8041 rick 1807 6741 th199096 static void 1808 6741 th199096 mds_do_lorecall(mds_lorec_t *lorec) 1809 6741 th199096 { 1810 7397 rick CB_COMPOUND4args cb4_args; 1811 7397 rick CB_COMPOUND4res cb4_res; 1812 7397 rick CB_SEQUENCE4args *cbsap; 1813 7397 rick CB_LAYOUTRECALL4args *cblrap; 1814 7739 jwahlig nfs_cb_argop4 *argops; 1815 7739 jwahlig struct timeval timeout; 1816 7739 jwahlig enum clnt_stat call_stat = RPC_FAILED; 1817 7739 jwahlig int zilch = 0; 1818 6741 th199096 layoutrecall_file4 *lorf; 1819 7397 rick CLIENT *ch; 1820 7397 rick int numops; 1821 7397 rick int argsz; 1822 10448 Thomas mds_session_t *sp; 1823 7739 jwahlig slot_ent_t *p; 1824 10447 Thomas mds_layout_grant_t *lg; 1825 8041 rick uint32_t sc = 0; 1826 8041 rick int retried = 0; 1827 6741 th199096 1828 6741 th199096 DTRACE_PROBE1(nfssrv__i__sess_lorecall_fh, mds_lorec_t *, lorec); 1829 10448 Thomas if ((sp = lorec->lor_sess) == NULL) { 1830 8041 rick kmem_free(lorec, sizeof (mds_lorec_t)); 1831 8041 rick return; 1832 6741 th199096 1833 10448 Thomas } else if (!SN_CB_CHAN_EST(sp)) { 1834 8041 rick kmem_free(lorec, sizeof (mds_lorec_t)); 1835 10448 Thomas rfs41_session_rele(sp); 1836 7397 rick return; 1837 8041 rick } 1838 7397 rick 1839 7397 rick /* 1840 8041 rick * Per-type pre-processing 1841 7512 rick */ 1842 8041 rick switch (lorec->lor_type) { 1843 8041 rick case LAYOUTRECALL4_FILE: 1844 10447 Thomas if (lorec->lor_lg == NULL) 1845 8041 rick return; 1846 10447 Thomas lg = lorec->lor_lg; 1847 10447 Thomas break; 1848 10447 Thomas 1849 10447 Thomas case LAYOUTRECALL4_FSID: 1850 10448 Thomas sp->sn_clnt->rc_bulk_recall = LAYOUTRETURN4_FSID; 1851 8041 rick break; 1852 8041 rick 1853 8041 rick case LAYOUTRECALL4_ALL: 1854 10448 Thomas sp->sn_clnt->rc_bulk_recall = LAYOUTRETURN4_ALL; 1855 9215 James break; 1856 8041 rick default: 1857 8041 rick break; 1858 8041 rick } 1859 7512 rick 1860 7512 rick /* 1861 7397 rick * set up the compound args 1862 7397 rick */ 1863 7397 rick numops = 2; /* CB_SEQUENCE + CB_LAYOUTRECALL */ 1864 7397 rick argsz = numops * sizeof (nfs_cb_argop4); 1865 7397 rick argops = kmem_zalloc(argsz, KM_SLEEP); 1866 6741 th199096 1867 6741 th199096 argops[0].argop = OP_CB_SEQUENCE; 1868 7397 rick cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence; 1869 6741 th199096 1870 6741 th199096 argops[1].argop = OP_CB_LAYOUTRECALL; 1871 7397 rick cblrap = &argops[1].nfs_cb_argop4_u.opcblayoutrecall; 1872 7397 rick 1873 7397 rick (void) str_to_utf8("cb_lo_recall", &cb4_args.tag); 1874 7397 rick cb4_args.minorversion = CB4_MINOR_v1; 1875 7397 rick 1876 10448 Thomas cb4_args.callback_ident = sp->sn_bc.progno; 1877 7397 rick cb4_args.array_len = numops; 1878 7397 rick cb4_args.array = argops; 1879 6741 th199096 1880 6741 th199096 cb4_res.tag.utf8string_val = NULL; 1881 6741 th199096 cb4_res.array = NULL; 1882 6741 th199096 1883 6741 th199096 /* 1884 7397 rick * CB_SEQUENCE 1885 6741 th199096 */ 1886 10448 Thomas bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4)); 1887 10448 Thomas p = svc_slot_alloc(sp); 1888 7397 rick mutex_enter(&p->se_lock); 1889 7397 rick cbsap->csa_slotid = p->se_sltno; 1890 7397 rick cbsap->csa_sequenceid = p->se_seqid; 1891 10448 Thomas cbsap->csa_highest_slotid = svc_slot_maxslot(sp); 1892 7397 rick cbsap->csa_cachethis = FALSE; 1893 6741 th199096 1894 7397 rick /* no referring calling list for lo recall */ 1895 7397 rick cbsap->csa_rcall_llen = 0; 1896 7397 rick cbsap->csa_rcall_lval = NULL; 1897 7397 rick mutex_exit(&p->se_lock); 1898 6741 th199096 1899 6741 th199096 /* 1900 7397 rick * CB_LAYOUTRECALL 1901 7397 rick * 1902 6741 th199096 * clora_change: 1903 7397 rick * 1: server prefers that client write modified data through 1904 7397 rick * MDS when pushing modified data due to layout recall 1905 6741 th199096 * 0: server has no DS/MDS preference 1906 6741 th199096 */ 1907 7397 rick cblrap->clora_type = LAYOUT4_NFSV4_1_FILES; 1908 7397 rick cblrap->clora_iomode = LAYOUTIOMODE4_ANY; 1909 7397 rick cblrap->clora_changed = 0; 1910 7397 rick cblrap->clora_recall.lor_recalltype = lorec->lor_type; 1911 6741 th199096 1912 6741 th199096 switch (lorec->lor_type) { 1913 6741 th199096 case LAYOUTRECALL4_FILE: 1914 7397 rick lorf = &cblrap->clora_recall.layoutrecall4_u.lor_layout; 1915 6741 th199096 lorf->lor_offset = 0; 1916 6741 th199096 lorf->lor_length = ONES_64; 1917 6741 th199096 lorf->lor_fh.nfs_fh4_len = lorec->lor_fh.fh_len; 1918 6741 th199096 lorf->lor_fh.nfs_fh4_val = (char *)&lorec->lor_fh.fh_buf; 1919 8041 rick bcopy(&lorec->lor_stid, &lorf->lor_stateid, sizeof (stateid4)); 1920 10447 Thomas (void) atomic_swap_32(&lg->lor_reply, 0); 1921 6741 th199096 break; 1922 6741 th199096 1923 6741 th199096 case LAYOUTRECALL4_FSID: 1924 7397 rick cblrap->clora_recall.layoutrecall4_u.lor_fsid = lorec->lor_fsid; 1925 6741 th199096 break; 1926 6741 th199096 1927 6741 th199096 case LAYOUTRECALL4_ALL: 1928 6741 th199096 default: 1929 6741 th199096 break; 1930 6741 th199096 } 1931 6741 th199096 1932 6741 th199096 /* 1933 6741 th199096 * Set up the timeout for the callback and make the actual call. 1934 6741 th199096 * Timeout will be 80% of the lease period. 1935 6741 th199096 */ 1936 10448 Thomas timeout.tv_sec = (dbe_to_instp(sp->sn_dbe)->lease_period * 80) / 100; 1937 6741 th199096 timeout.tv_usec = 0; 1938 8041 rick retry: 1939 10448 Thomas ch = rfs41_cb_getch(sp); 1940 7397 rick (void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch); 1941 7397 rick call_stat = clnt_call(ch, CB_COMPOUND, 1942 7397 rick xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args, 1943 7397 rick xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout); 1944 10448 Thomas rfs41_cb_freech(sp, ch); 1945 6741 th199096 1946 7397 rick if (call_stat != RPC_SUCCESS) { 1947 8041 rick switch (lorec->lor_type) { 1948 8041 rick case LAYOUTRECALL4_FILE: 1949 8041 rick if (!retried) 1950 8041 rick delay(SEC_TO_TICK(rfs4_lease_time)); 1951 8041 rick 1952 10447 Thomas if (rfs41_lo_still_granted(lg)) { 1953 8041 rick if (!retried) { 1954 8041 rick retried = 1; 1955 8041 rick goto retry; 1956 8041 rick } 1957 8041 rick 1958 8041 rick /* 1959 8041 rick * We want to make sure that the layout is 1960 8041 rick * still granted lest we assert a SEQ4 flag 1961 8041 rick * that will never be turned off. 1962 8041 rick */ 1963 10447 Thomas rfs41_revoke_layout(lg); 1964 8041 rick } 1965 8041 rick sc = (call_stat == RPC_CANTSEND || 1966 8041 rick call_stat == RPC_CANTRECV); 1967 10448 Thomas rfs41_cb_path_down(sp, sc); 1968 8041 rick goto done; 1969 8041 rick 1970 8041 rick case LAYOUTRECALL4_FSID: 1971 8041 rick case LAYOUTRECALL4_ALL: 1972 10448 Thomas sp->sn_clnt->rc_bulk_recall = 0; 1973 8041 rick /* 1974 8041 rick * XXX - how do we determine if layouts still 1975 8041 rick * outstanding for fsid/all cases ? 1976 8041 rick */ 1977 8041 rick default: 1978 8041 rick break; 1979 8041 rick } 1980 8041 rick 1981 8041 rick } else { /* RPC_SUCCESS */ 1982 8041 rick 1983 7397 rick /* 1984 8041 rick * Per-type results processing 1985 7397 rick */ 1986 8041 rick switch (lorec->lor_type) { 1987 8041 rick case LAYOUTRECALL4_FILE: 1988 10447 Thomas (void) atomic_swap_32(&lg->lor_reply, 1); 1989 8041 rick break; 1990 6741 th199096 1991 8041 rick case LAYOUTRECALL4_FSID: 1992 8041 rick case LAYOUTRECALL4_ALL: 1993 8041 rick default: 1994 8041 rick break; 1995 8041 rick } 1996 8041 rick } 1997 8041 rick 1998 8041 rick if (cb4_res.status != NFS4_OK) { 1999 8041 rick nfsstat4 s = cb4_res.status; 2000 8041 rick 2001 8041 rick switch (s) { 2002 8041 rick case NFS4ERR_BADHANDLE: 2003 8041 rick case NFS4ERR_BADIOMODE: 2004 8041 rick case NFS4ERR_BADXDR: 2005 8041 rick case NFS4ERR_INVAL: 2006 8041 rick case NFS4ERR_NOMATCHING_LAYOUT: 2007 8041 rick case NFS4ERR_NOTSUPP: 2008 8041 rick case NFS4ERR_OP_NOT_IN_SESSION: 2009 8041 rick case NFS4ERR_REP_TOO_BIG: 2010 8041 rick case NFS4ERR_REP_TOO_BIG_TO_CACHE: 2011 8041 rick case NFS4ERR_REQ_TOO_BIG: 2012 8041 rick case NFS4ERR_TOO_MANY_OPS: 2013 8041 rick case NFS4ERR_UNKNOWN_LAYOUTTYPE: 2014 8041 rick case NFS4ERR_WRONG_TYPE: 2015 8041 rick /* What do we do when it's our own fault ? */ 2016 8041 rick cmn_err(CE_NOTE, "cb_lo_recall: %s", nfs41_strerror(s)); 2017 8041 rick break; 2018 8041 rick 2019 8041 rick case NFS4ERR_DELAY: 2020 8041 rick switch (lorec->lor_type) { 2021 8041 rick case LAYOUTRECALL4_FILE: 2022 8041 rick { 2023 8041 rick bool_t granted = FALSE; 2024 8041 rick 2025 8041 rick if (!retried) 2026 8041 rick delay(SEC_TO_TICK(rfs4_lease_time)); 2027 8041 rick 2028 10447 Thomas granted = rfs41_lo_still_granted(lg); 2029 8041 rick if (!granted) 2030 8041 rick break; 2031 8041 rick 2032 8041 rick if (!retried) { 2033 8041 rick retried = 1; 2034 8041 rick goto retry; 2035 8041 rick } 2036 8041 rick 2037 8041 rick if (granted) 2038 10447 Thomas rfs41_revoke_layout(lg); 2039 8041 rick break; 2040 8041 rick } 2041 8041 rick 2042 8041 rick case LAYOUTRECALL4_FSID: 2043 8041 rick case LAYOUTRECALL4_ALL: 2044 8041 rick default: 2045 8041 rick break; 2046 8041 rick } 2047 8041 rick break; 2048 8041 rick 2049 8041 rick case NFS4ERR_BAD_STATEID: /* XXX - retry BAD_STATEID ? */ 2050 8041 rick default: 2051 8041 rick if (lorec->lor_type == LAYOUTRECALL4_FILE) 2052 10447 Thomas if (rfs41_lo_still_granted(lg)) 2053 10447 Thomas rfs41_revoke_layout(lg); 2054 8041 rick break; 2055 8041 rick } 2056 7397 rick 2057 6741 th199096 } 2058 7397 rick svc_slot_cb_seqid(&cb4_res, p); 2059 7397 rick done: 2060 6741 th199096 kmem_free(lorec, sizeof (mds_lorec_t)); 2061 8041 rick rfs4freeargres(&cb4_args, &cb4_res); 2062 8041 rick 2063 10448 Thomas svc_slot_free(sp, p); 2064 10448 Thomas rfs41_session_rele(sp); 2065 8041 rick 2066 8041 rick /* 2067 8041 rick * Per-type post-processing 2068 8041 rick */ 2069 8041 rick switch (lorec->lor_type) { 2070 8041 rick case LAYOUTRECALL4_FILE: 2071 10447 Thomas rfs41_lo_grant_rele(lg); 2072 8041 rick break; 2073 8041 rick 2074 8041 rick case LAYOUTRECALL4_FSID: 2075 8041 rick case LAYOUTRECALL4_ALL: 2076 8041 rick default: 2077 8041 rick break; 2078 8041 rick } 2079 6741 th199096 } 2080 6741 th199096 2081 8041 rick /* 2082 8041 rick * Bulk Layout Recall (ALL) 2083 8041 rick */ 2084 6741 th199096 static void 2085 8041 rick all_lor(rfs4_entry_t entry, void *args) 2086 8041 rick { 2087 10448 Thomas mds_session_t *sp = (mds_session_t *)entry; 2088 8041 rick mds_lorec_t *lrp = (mds_lorec_t *)args; 2089 8041 rick mds_lorec_t *lorec; 2090 8041 rick 2091 10448 Thomas if (sp == NULL || lrp == NULL) 2092 10448 Thomas return; 2093 10448 Thomas 2094 10448 Thomas ASSERT(rfs4_dbe_islocked(sp->sn_dbe)); 2095 8041 rick lorec = kmem_zalloc(sizeof (mds_lorec_t), KM_SLEEP); 2096 8041 rick bcopy(args, lorec, sizeof (mds_lorec_t)); 2097 8041 rick 2098 10448 Thomas rfs4_dbe_hold(sp->sn_dbe); 2099 10448 Thomas lorec->lor_sess = sp; 2100 8041 rick 2101 8041 rick (void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, TS_RUN, 2102 8041 rick minclsyspri); 2103 8041 rick } 2104 8041 rick 2105 8041 rick /* 2106 8041 rick * Layout Recall by FSID 2107 8041 rick */ 2108 8041 rick static void 2109 10016 Thomas fsid_lor(rfs4_entry_t u_entry, void *args) 2110 8041 rick { 2111 8041 rick mds_lorec_t *lrp = (mds_lorec_t *)args; 2112 10447 Thomas mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry; 2113 10447 Thomas mds_ever_grant_t key; 2114 8041 rick vnode_t *vp = NULL; 2115 8041 rick 2116 10447 Thomas if (eg == NULL || lrp == NULL || rfs4_dbe_is_invalid(eg->eg_dbe)) 2117 10447 Thomas return; 2118 10447 Thomas 2119 10447 Thomas ASSERT(rfs4_dbe_islocked(eg->eg_dbe)); 2120 8041 rick if ((vp = (vnode_t *)lrp->lor_vp) == NULL) 2121 8041 rick return; 2122 8041 rick 2123 10447 Thomas key.eg_fsid = vp->v_vfsp->vfs_fsid; 2124 10016 Thomas if (mds_ever_grant_fsid_compare(u_entry, 2125 10447 Thomas (void *)(uintptr_t)key.eg_key)) { 2126 8041 rick mds_lorec_t *lorec; 2127 10448 Thomas mds_session_t *sp; 2128 10016 Thomas nfs_server_instance_t *instp; 2129 10016 Thomas 2130 10016 Thomas instp = dbe_to_instp(u_entry->dbe); 2131 8041 rick 2132 8041 rick lorec = kmem_zalloc(sizeof (mds_lorec_t), KM_SLEEP); 2133 8041 rick bcopy(args, lorec, sizeof (mds_lorec_t)); 2134 8041 rick 2135 10447 Thomas ASSERT(eg->eg_cp != NULL); 2136 10448 Thomas sp = mds_findsession_by_clid(instp, eg->eg_cp->rc_clientid); 2137 10448 Thomas if (sp == NULL) { 2138 8041 rick kmem_free(lorec, sizeof (mds_lorec_t)); 2139 8041 rick return; 2140 8041 rick } 2141 10448 Thomas lorec->lor_sess = sp; /* hold courtesy of findsession */ 2142 8041 rick 2143 8041 rick (void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, 2144 8041 rick TS_RUN, minclsyspri); 2145 8041 rick } 2146 8041 rick } 2147 8041 rick 2148 8041 rick /* 2149 8041 rick * Layout Recall by File 2150 8041 rick */ 2151 8041 rick static void 2152 8041 rick file_lor(rfs4_entry_t entry, void *arg) 2153 6741 th199096 { 2154 6741 th199096 mds_lorec_t *lorec; 2155 6741 th199096 2156 6741 th199096 lorec = kmem_alloc(sizeof (mds_lorec_t), KM_SLEEP); 2157 6741 th199096 bcopy(arg, lorec, sizeof (mds_lorec_t)); 2158 8041 rick lorec->lor_sess = (mds_session_t *)entry; 2159 6741 th199096 2160 8041 rick (void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, TS_RUN, 2161 8041 rick minclsyspri); 2162 7739 jwahlig } 2163 7739 jwahlig 2164 8312 webaker 2165 6741 th199096 /* 2166 7739 jwahlig * Recall a layout: 2167 7739 jwahlig * 2168 7739 jwahlig * Either all layouts 2169 7739 jwahlig * 2170 7739 jwahlig * ... or 2171 7739 jwahlig * 2172 7739 jwahlig * For a given pathname construct FH first (same thing we do 2173 7739 jwahlig * for nfs_sys(GETFH)) args have already been copied into kernel 2174 7739 jwahlig * adspace 2175 6741 th199096 */ 2176 6741 th199096 static int 2177 6741 th199096 mds_lorecall_cmd(struct mds_reclo_args *args, cred_t *cr) 2178 6741 th199096 { 2179 8041 rick int error; 2180 8041 rick nfs_fh4 fh4; 2181 8041 rick struct exportinfo *exi; 2182 8041 rick mds_lorec_t lorec; 2183 8041 rick vnode_t *vp = NULL; 2184 8041 rick vnode_t *dvp = NULL; 2185 8041 rick rfs4_file_t *fp = NULL; 2186 8041 rick rfs4_client_t *cp = NULL; 2187 8041 rick rfs41_grant_list_t *glp = NULL; 2188 10448 Thomas mds_session_t *sp = NULL; 2189 8041 rick 2190 8041 rick lorec.lor_type = args->lo_type; 2191 8041 rick switch (args->lo_type) { 2192 8041 rick case LAYOUTRECALL4_ALL: 2193 8041 rick if (mds_server->mds_session_tab == NULL) 2194 8041 rick return (ECANCELED); 2195 8041 rick 2196 8041 rick rfs4_dbe_walk(mds_server->mds_session_tab, all_lor, &lorec); 2197 8041 rick return (0); 2198 8041 rick 2199 8041 rick case LAYOUTRECALL4_FILE: 2200 8041 rick case LAYOUTRECALL4_FSID: 2201 8041 rick break; 2202 8041 rick 2203 8041 rick default: 2204 8041 rick return (EINVAL); 2205 8041 rick } 2206 8041 rick 2207 8041 rick if (error = lookupname(args->lo_fname, UIO_SYSSPACE, FOLLOW, &dvp, &vp)) 2208 8041 rick return (error); 2209 8041 rick 2210 8041 rick if (vp == NULL) { 2211 8041 rick if (dvp != NULL) 2212 8041 rick VN_RELE(dvp); 2213 8041 rick return (ENOENT); 2214 8041 rick } 2215 6741 th199096 2216 7397 rick /* 2217 8041 rick * 'vp' may be an AUTOFS node, so we perform a VOP_ACCESS() 2218 8041 rick * to trigger the mount of the intended filesystem, so we 2219 8041 rick * can share the intended filesystem instead of the AUTOFS 2220 8041 rick * filesystem. 2221 6741 th199096 */ 2222 6741 th199096 (void) VOP_ACCESS(vp, 0, 0, cr, NULL); 2223 6741 th199096 2224 6741 th199096 /* 2225 8041 rick * We're interested in the top most filesystem. This is 2226 8041 rick * specially important when uap->dname is a trigger AUTOFS 2227 8041 rick * node, since we're really interested in sharing the 2228 6741 th199096 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 2229 8041 rick * call, not the AUTOFS node itself. 2230 6741 th199096 */ 2231 6741 th199096 if (vn_mountedvfs(vp) != NULL) { 2232 6741 th199096 if (error = traverse(&vp)) 2233 6741 th199096 goto errout; 2234 6741 th199096 } 2235 6741 th199096 2236 6741 th199096 /* 2237 8041 rick * The last arg for nfs_vptoexi says to create a v4 FH 2238 8041 rick * (instead of v3). This will need to be changed to 2239 8041 rick * select the new MDS FH format. 2240 6741 th199096 */ 2241 6741 th199096 rw_enter(&exported_lock, RW_READER); 2242 6741 th199096 exi = nfs_vptoexi(dvp, vp, cr, NULL, &error, TRUE); 2243 6741 th199096 rw_exit(&exported_lock); 2244 6741 th199096 2245 6741 th199096 /* 2246 6741 th199096 * file isn't shared. 2247 6741 th199096 */ 2248 6741 th199096 if (exi == NULL) 2249 6741 th199096 goto errout; 2250 6741 th199096 2251 6741 th199096 fh4.nfs_fh4_val = lorec.lor_fh.fh_buf; 2252 6741 th199096 error = mknfs41_fh(&fh4, vp, exi); 2253 6741 th199096 lorec.lor_fh.fh_len = fh4.nfs_fh4_len; 2254 6741 th199096 lorec.lor_sess = NULL; 2255 6741 th199096 2256 8041 rick switch (lorec.lor_type) { 2257 8041 rick case LAYOUTRECALL4_FILE: 2258 10462 Thomas mutex_enter(&vp->v_vsd_lock); 2259 8041 rick fp = (rfs4_file_t *)vsd_get(vp, mds_server->vkey); 2260 10462 Thomas mutex_exit(&vp->v_vsd_lock); 2261 8041 rick if (fp == NULL) { 2262 8041 rick error = EIO; 2263 8041 rick goto errout; 2264 8041 rick } 2265 6741 th199096 2266 8041 rick /* 2267 8041 rick * There may be a cleaner way to run the per-file lists, 2268 8041 rick * but this works for now. This sends a cb_lo_recall to 2269 8041 rick * the clients that have an active layout for the file, 2270 8041 rick * only. Stop the blasting ! 2271 8041 rick */ 2272 10447 Thomas glp = fp->rf_lo_grant_list.next; 2273 10447 Thomas for (; glp && glp->lg; glp = glp->next) { 2274 10447 Thomas 2275 10447 Thomas if ((cp = glp->lg->lo_cp) == NULL) 2276 8041 rick continue; /* internal inconsistency ? */ 2277 6741 th199096 2278 10447 Thomas rfs41_lo_grant_hold(glp->lg); 2279 10448 Thomas sp = mds_findsession_by_clid(mds_server, 2280 10447 Thomas cp->rc_clientid); 2281 10448 Thomas if (sp != NULL) { 2282 8041 rick /* 2283 8041 rick * Recall in progress ! 2284 8041 rick * 2285 8041 rick * As per spec rules, bump up the seqid (of 2286 8041 rick * the stateid) and make sure we store it in 2287 8041 rick * the layout grant info; this will eventually 2288 8041 rick * be used for layout race detection. 2289 8041 rick */ 2290 10447 Thomas rfs4_dbe_lock(glp->lg->lo_dbe); 2291 10447 Thomas 2292 10447 Thomas glp->lg->lo_status = LO_RECALL_INPROG; 2293 10447 Thomas rfs41_lo_seqid(&glp->lg->lo_stateid); 2294 10447 Thomas 2295 10447 Thomas mutex_enter(&glp->lg->lo_lock); 2296 10447 Thomas glp->lg->lor_seqid = 2297 10447 Thomas glp->lg->lo_stateid.v41_bits.chgseq; 2298 10447 Thomas mutex_exit(&glp->lg->lo_lock); 2299 10447 Thomas 2300 10447 Thomas bcopy(&glp->lg->lo_stateid.stateid, 2301 8041 rick &lorec.lor_stid, sizeof (stateid4)); 2302 10447 Thomas lorec.lor_lg = glp->lg; 2303 10447 Thomas rfs41_lo_grant_hold(glp->lg); 2304 10447 Thomas 2305 10447 Thomas rfs4_dbe_unlock(glp->lg->lo_dbe); 2306 10448 Thomas file_lor((rfs4_entry_t)sp, (void *)&lorec); 2307 10447 Thomas } 2308 10447 Thomas rfs41_lo_grant_rele(glp->lg); 2309 8041 rick } 2310 8041 rick break; 2311 8041 rick 2312 8041 rick case LAYOUTRECALL4_FSID: 2313 8041 rick /* 2314 8041 rick * set fsid just like rfs4_fattr4_fsid() 2315 8041 rick */ 2316 8041 rick if (exi->exi_volatile_dev) { 2317 8041 rick int *pmaj = (int *)&lorec.lor_fsid.major; 2318 8041 rick 2319 8041 rick pmaj[0] = exi->exi_fsid.val[0]; 2320 8041 rick pmaj[1] = exi->exi_fsid.val[1]; 2321 8041 rick lorec.lor_fsid.minor = 0; 2322 8041 rick } else { 2323 8041 rick vattr_t va; 2324 8041 rick 2325 8041 rick va.va_mask = AT_FSID | AT_TYPE; 2326 8041 rick error = rfs4_vop_getattr(vp, &va, 0, cr); 2327 8041 rick 2328 8041 rick if (error == 0 && va.va_type != VREG) 2329 8041 rick error = EINVAL; 2330 8041 rick if (error) 2331 8041 rick goto errout; 2332 8041 rick 2333 8041 rick lorec.lor_fsid.major = getmajor(va.va_fsid); 2334 8041 rick lorec.lor_fsid.minor = getminor(va.va_fsid); 2335 8041 rick } 2336 8041 rick 2337 8041 rick if (mds_server->mds_ever_grant_tab == NULL) { 2338 8041 rick error = ECANCELED; 2339 6741 th199096 goto errout; 2340 8041 rick } 2341 6741 th199096 2342 8041 rick lorec.lor_vp = vp; 2343 8041 rick VN_HOLD(vp); 2344 8041 rick rfs4_dbe_walk(mds_server->mds_ever_grant_tab, fsid_lor, &lorec); 2345 8041 rick VN_RELE(vp); 2346 8041 rick break; 2347 8041 rick 2348 8041 rick default: 2349 8041 rick break; 2350 6741 th199096 } 2351 6741 th199096 2352 6741 th199096 errout: 2353 6741 th199096 VN_RELE(vp); 2354 6741 th199096 if (dvp != NULL) 2355 6741 th199096 VN_RELE(dvp); 2356 6741 th199096 return (error); 2357 8312 webaker } 2358 8312 webaker 2359 8312 webaker /* support for device notifications via mdsadm */ 2360 8312 webaker 2361 8312 webaker typedef struct mds_notify_device { 2362 8312 webaker mds_session_t *nd_sess; 2363 8312 webaker struct mds_notifydev_args nd_args; 2364 8312 webaker 2365 8312 webaker } mds_notify_device_t; 2366 8312 webaker 2367 8312 webaker static void 2368 8312 webaker mds_do_notify_device(mds_notify_device_t *ndp) 2369 8312 webaker { 2370 8312 webaker CB_COMPOUND4args cb4_args; 2371 8312 webaker CB_COMPOUND4res cb4_res; 2372 8312 webaker CB_SEQUENCE4args *cbsap; 2373 8312 webaker CB_NOTIFY_DEVICEID4args *cbndap; 2374 8312 webaker nfs_cb_argop4 *argops; 2375 8312 webaker struct timeval timeout; 2376 8312 webaker enum clnt_stat call_stat = RPC_FAILED; 2377 8312 webaker int zilch = 0; 2378 8312 webaker CLIENT *ch; 2379 8312 webaker int numops; 2380 8312 webaker int argsz; 2381 10448 Thomas mds_session_t *sp; 2382 8312 webaker slot_ent_t *p; 2383 8312 webaker notify4 no; 2384 8312 webaker char *xdr_buf = NULL; 2385 8312 webaker int xdr_size; 2386 8312 webaker XDR xdr; 2387 8312 webaker 2388 8312 webaker DTRACE_PROBE1(nfssrv__i__sess_notify_device, mds_notify_device_t *, 2389 8312 webaker ndp); 2390 8312 webaker 2391 8312 webaker if (ndp->nd_sess == NULL) 2392 8312 webaker return; 2393 10448 Thomas sp = ndp->nd_sess; 2394 8312 webaker 2395 8312 webaker /* 2396 8312 webaker * XXX - until we fix blasting _all_ sessions for one notification, 2397 8312 webaker * make sure that the session in question at least has the 2398 8312 webaker * back chan established. 2399 8312 webaker */ 2400 10448 Thomas if (!SN_CB_CHAN_EST(sp)) 2401 8312 webaker return; 2402 8312 webaker 2403 8312 webaker /* 2404 8312 webaker * set up the compound args 2405 8312 webaker */ 2406 8312 webaker numops = 2; /* CB_SEQUENCE + CB_NOTIFY_DEVICE */ 2407 8312 webaker argsz = numops * sizeof (nfs_cb_argop4); 2408 8312 webaker argops = kmem_zalloc(argsz, KM_SLEEP); 2409 8312 webaker 2410 8312 webaker argops[0].argop = OP_CB_SEQUENCE; 2411 8312 webaker cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence; 2412 8312 webaker 2413 8312 webaker argops[1].argop = OP_CB_NOTIFY_DEVICEID; 2414 8312 webaker cbndap = &argops[1].nfs_cb_argop4_u.opcbnotify_deviceid; 2415 8312 webaker 2416 8312 webaker (void) str_to_utf8("cb_notify_device", &cb4_args.tag); 2417 8312 webaker cb4_args.minorversion = CB4_MINOR_v1; 2418 8312 webaker 2419 10448 Thomas cb4_args.callback_ident = sp->sn_bc.progno; 2420 8312 webaker cb4_args.array_len = numops; 2421 8312 webaker cb4_args.array = argops; 2422 8312 webaker 2423 8312 webaker cb4_res.tag.utf8string_val = NULL; 2424 8312 webaker cb4_res.array = NULL; 2425 8312 webaker 2426 8312 webaker /* 2427 8312 webaker * CB_SEQUENCE 2428 8312 webaker */ 2429 10448 Thomas bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4)); 2430 10448 Thomas p = svc_slot_alloc(sp); 2431 8312 webaker mutex_enter(&p->se_lock); 2432 8312 webaker cbsap->csa_slotid = p->se_sltno; 2433 8312 webaker cbsap->csa_sequenceid = p->se_seqid; 2434 10448 Thomas cbsap->csa_highest_slotid = svc_slot_maxslot(sp); 2435 8312 webaker cbsap->csa_cachethis = FALSE; 2436 8312 webaker 2437 8312 webaker /* no referring calling list for device notifications */ 2438 8312 webaker cbsap->csa_rcall_llen = 0; 2439 8312 webaker cbsap->csa_rcall_lval = NULL; 2440 8312 webaker mutex_exit(&p->se_lock); 2441 8312 webaker 2442 8312 webaker /* 2443 8312 webaker * CB_NOTIFY_DEVICEID (well, d'uh) 2444 8312 webaker */ 2445 8312 webaker cbndap->cnda_changes.cnda_changes_len = 1; 2446 8312 webaker cbndap->cnda_changes.cnda_changes_val = &no; 2447 8312 webaker if (ndp->nd_args.notify_how == NOTIFY_DEVICEID4_DELETE) { 2448 8312 webaker notify_deviceid_delete4 nodd; 2449 8312 webaker 2450 8312 webaker no.notify_mask = NOTIFY_DEVICEID4_DELETE_MASK; 2451 8312 webaker nodd.ndd_layouttype = LAYOUT4_NFSV4_1_FILES; 2452 9209 Jeff (void) memset(&nodd.ndd_deviceid, 0, sizeof (deviceid4)); 2453 8312 webaker bcopy(&ndp->nd_args.dev_id, &nodd.ndd_deviceid, 2454 8312 webaker sizeof (ndp->nd_args.dev_id)); 2455 8312 webaker 2456 8312 webaker /* encode the notification blob */ 2457 8312 webaker 2458 8312 webaker xdr_size = xdr_sizeof(xdr_notify_deviceid_delete4, &nodd); 2459 8312 webaker ASSERT(xdr_size); 2460 8312 webaker xdr_buf = kmem_alloc(xdr_size, KM_SLEEP); 2461 8312 webaker xdrmem_create(&xdr, xdr_buf, xdr_size, XDR_ENCODE); 2462 8312 webaker 2463 8312 webaker if (xdr_notify_deviceid_delete4(&xdr, &nodd) == FALSE) 2464 8312 webaker goto done; 2465 8312 webaker 2466 8312 webaker /* 2467 8312 webaker * Once the blob is encoded, we no longer need 2468 8312 webaker * nodd, which goes out of scope here. 2469 8312 webaker */ 2470 8312 webaker 2471 8312 webaker } else { 2472 8312 webaker notify_deviceid_change4 nodc; 2473 8312 webaker 2474 8312 webaker no.notify_mask = NOTIFY_DEVICEID4_CHANGE_MASK; 2475 8312 webaker nodc.ndc_layouttype = LAYOUT4_NFSV4_1_FILES; 2476 9209 Jeff (void) memset(&nodc.ndc_deviceid, 0, sizeof (deviceid4)); 2477 8312 webaker bcopy(&ndp->nd_args.dev_id, &nodc.ndc_deviceid, 2478 8312 webaker sizeof (ndp->nd_args.dev_id)); 2479 8312 webaker 2480 8312 webaker xdr_size = xdr_sizeof(xdr_notify_deviceid_change4, &nodc); 2481 8312 webaker ASSERT(xdr_size); 2482 8312 webaker xdr_buf = kmem_alloc(xdr_size, KM_SLEEP); 2483 8312 webaker xdrmem_create(&xdr, xdr_buf, xdr_size, XDR_ENCODE); 2484 8312 webaker 2485 8312 webaker if (xdr_notify_deviceid_change4(&xdr, &nodc) == FALSE) { 2486 8312 webaker kmem_free(xdr_buf, xdr_size); 2487 8312 webaker xdr_size = 0; 2488 8312 webaker xdr_buf = NULL; 2489 8312 webaker } 2490 8312 webaker } 2491 8312 webaker 2492 8312 webaker no.notify_vals.notifylist4_len = xdr_size; 2493 8312 webaker no.notify_vals.notifylist4_val = xdr_buf; 2494 8312 webaker 2495 8312 webaker /* 2496 8312 webaker * Set up the timeout for the callback and make the actual call. 2497 8312 webaker * Timeout will be 80% of the lease period. 2498 8312 webaker */ 2499 8312 webaker timeout.tv_sec = 2500 10448 Thomas (dbe_to_instp(sp->sn_dbe)->lease_period * 80) / 100; 2501 8312 webaker timeout.tv_usec = 0; 2502 8312 webaker 2503 10448 Thomas ch = rfs41_cb_getch(sp); 2504 8312 webaker (void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch); 2505 8312 webaker call_stat = clnt_call(ch, CB_COMPOUND, 2506 8312 webaker xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args, 2507 8312 webaker xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout); 2508 10448 Thomas rfs41_cb_freech(sp, ch); 2509 8312 webaker 2510 8312 webaker /* 2511 8312 webaker * Errors from the client are harmless for now, since this 2512 8312 webaker * is invoked by an administrative action for testing purposes. 2513 8312 webaker * In the future, if this were part of the normal server action, 2514 8312 webaker * these errors would need to be handled. 2515 8312 webaker */ 2516 8312 webaker if (call_stat != RPC_SUCCESS) { 2517 8312 webaker cmn_err(CE_NOTE, "mds_do_notify_device: RPC call failed %d", 2518 8312 webaker call_stat); 2519 8312 webaker goto done; 2520 8312 webaker 2521 8312 webaker } else if (cb4_res.status != NFS4_OK) { 2522 8312 webaker cmn_err(CE_NOTE, "mds_do_notify_device: compound failed %d", 2523 8312 webaker cb4_res.status); 2524 8312 webaker 2525 8312 webaker } 2526 8312 webaker svc_slot_cb_seqid(&cb4_res, p); 2527 8312 webaker xdr_free(xdr_CB_COMPOUND4res, (caddr_t)&cb4_res); 2528 8312 webaker done: 2529 8312 webaker kmem_free(cb4_args.tag.utf8string_val, cb4_args.tag.utf8string_len); 2530 8312 webaker kmem_free(argops, argsz); 2531 8312 webaker kmem_free(ndp, sizeof (*ndp)); 2532 8312 webaker if (xdr_buf) 2533 8312 webaker kmem_free(xdr_buf, xdr_size); 2534 10448 Thomas svc_slot_free(sp, p); 2535 8312 webaker } 2536 8312 webaker 2537 8312 webaker static void 2538 8312 webaker mds_sess_notify_device_callout(rfs4_entry_t u_entry, void *arg) 2539 8312 webaker { 2540 8312 webaker mds_notify_device_t *ndp; 2541 8312 webaker 2542 8312 webaker ndp = kmem_alloc(sizeof (*ndp), KM_SLEEP); 2543 8312 webaker bcopy(arg, &ndp->nd_args, sizeof (ndp->nd_args)); 2544 8312 webaker ndp->nd_sess = (mds_session_t *)u_entry; 2545 8312 webaker 2546 8312 webaker (void) thread_create(NULL, 0, mds_do_notify_device, ndp, 0, &p0, 2547 8312 webaker TS_RUN, minclsyspri); 2548 8312 webaker } 2549 8312 webaker 2550 8312 webaker void 2551 8312 webaker inst_notify_device(nfs_server_instance_t *instp, void *args) 2552 8312 webaker { 2553 8312 webaker if (instp->mds_session_tab != NULL) 2554 8312 webaker rfs4_dbe_walk(instp->mds_session_tab, 2555 8312 webaker mds_sess_notify_device_callout, args); 2556 8312 webaker } 2557 8312 webaker 2558 8312 webaker /*ARGSUSED*/ 2559 8312 webaker static int 2560 8312 webaker mds_notify_device_cmd(struct mds_notifydev_args *args, cred_t *cr) 2561 8312 webaker { 2562 8312 webaker /* 2563 8312 webaker * Walk the list of server instances, asking each 2564 8312 webaker * to notify the specified device. 2565 8312 webaker */ 2566 8312 webaker nsi_walk(inst_notify_device, args); 2567 8312 webaker return (0); 2568 6741 th199096 } 2569 6741 th199096 2570 6741 th199096 /* 2571 6741 th199096 * ----------------------------------------------- 2572 7739 jwahlig * MDS: DS_ADDR tables. 2573 6741 th199096 * ----------------------------------------------- 2574 6741 th199096 * 2575 6741 th199096 */ 2576 6741 th199096 2577 6741 th199096 static uint32_t 2578 7811 Thomas ds_addrlist_hash(void *key) 2579 6741 th199096 { 2580 6741 th199096 return ((uint32_t)(uintptr_t)key); 2581 6741 th199096 } 2582 6741 th199096 2583 6741 th199096 static bool_t 2584 10016 Thomas ds_addrlist_compare(rfs4_entry_t u_entry, void *key) 2585 10016 Thomas { 2586 10016 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)u_entry; 2587 6741 th199096 2588 6741 th199096 return (rfs4_dbe_getid(dp->dbe) == (int)(uintptr_t)key); 2589 6741 th199096 } 2590 6741 th199096 2591 6741 th199096 static void * 2592 7811 Thomas ds_addrlist_mkkey(rfs4_entry_t entry) 2593 6741 th199096 { 2594 7811 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)entry; 2595 6741 th199096 2596 6741 th199096 return ((void *)(uintptr_t)rfs4_dbe_getid(dp->dbe)); 2597 6741 th199096 } 2598 6741 th199096 2599 6741 th199096 /*ARGSUSED*/ 2600 6741 th199096 static bool_t 2601 7811 Thomas ds_addrlist_create(rfs4_entry_t u_entry, void *arg) 2602 6741 th199096 { 2603 7811 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)u_entry; 2604 6741 th199096 struct mds_adddev_args *u_dp = (struct mds_adddev_args *)arg; 2605 6741 th199096 2606 9404 Thomas dp->dev_addr.na_r_netid = kstrdup(u_dp->dev_netid); 2607 9404 Thomas dp->dev_addr.na_r_addr = kstrdup(u_dp->dev_addr); 2608 7739 jwahlig dp->ds_owner = NULL; 2609 7740 Robert dp->dev_knc = NULL; 2610 7740 Robert dp->dev_nb = NULL; 2611 9407 Thomas dp->ds_addr_key = 0; 2612 9407 Thomas dp->ds_port_key = 0; 2613 9404 Thomas 2614 6741 th199096 return (TRUE); 2615 6741 th199096 } 2616 6741 th199096 2617 6741 th199096 /*ARGSUSED*/ 2618 6741 th199096 static void 2619 10016 Thomas ds_addrlist_destroy(rfs4_entry_t u_entry) 2620 10016 Thomas { 2621 10016 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)u_entry; 2622 9404 Thomas int i; 2623 10016 Thomas nfs_server_instance_t *instp; 2624 10016 Thomas 2625 10016 Thomas instp = dbe_to_instp(u_entry->dbe); 2626 10016 Thomas 2627 10016 Thomas rw_enter(&instp->ds_addrlist_lock, RW_WRITER); 2628 9404 Thomas if (dp->ds_owner != NULL) { 2629 9404 Thomas list_remove(&dp->ds_owner->ds_addrlist_list, dp); 2630 9404 Thomas rfs4_dbe_rele(dp->ds_owner->dbe); 2631 9404 Thomas dp->ds_owner = NULL; 2632 9404 Thomas } 2633 10016 Thomas rw_exit(&instp->ds_addrlist_lock); 2634 9404 Thomas 2635 9404 Thomas if (dp->dev_addr.na_r_netid) { 2636 9404 Thomas i = strlen(dp->dev_addr.na_r_netid) + 1; 2637 9404 Thomas kmem_free(dp->dev_addr.na_r_netid, i); 2638 9404 Thomas } 2639 9404 Thomas 2640 9404 Thomas if (dp->dev_addr.na_r_addr) { 2641 9404 Thomas i = strlen(dp->dev_addr.na_r_addr) + 1; 2642 9404 Thomas kmem_free(dp->dev_addr.na_r_addr, i); 2643 9404 Thomas } 2644 7740 Robert 2645 7740 Robert if (dp->dev_knc != NULL) 2646 7740 Robert kmem_free(dp->dev_knc, sizeof (struct knetconfig)); 2647 9404 Thomas 2648 7740 Robert if (dp->dev_nb != NULL) { 2649 7740 Robert if (dp->dev_nb->buf) 2650 7740 Robert kmem_free(dp->dev_nb->buf, dp->dev_nb->maxlen); 2651 7740 Robert kmem_free(dp->dev_nb, sizeof (struct netbuf)); 2652 7740 Robert } 2653 6741 th199096 } 2654 6741 th199096 2655 6741 th199096 2656 6741 th199096 /* 2657 6741 th199096 * Multipath devices. 2658 6741 th199096 */ 2659 6741 th199096 static uint32_t 2660 6741 th199096 mds_mpd_hash(void *key) 2661 6741 th199096 { 2662 6741 th199096 return ((uint32_t)(uintptr_t)key); 2663 6741 th199096 } 2664 6741 th199096 2665 6741 th199096 static bool_t 2666 10016 Thomas mds_mpd_compare(rfs4_entry_t u_entry, void *key) 2667 10016 Thomas { 2668 10016 Thomas mds_mpd_t *mp = (mds_mpd_t *)u_entry; 2669 10016 Thomas 2670 10016 Thomas return (mp->mpd_id == (id_t)(uintptr_t)key); 2671 10016 Thomas } 2672 10016 Thomas 2673 10016 Thomas static void * 2674 10016 Thomas mds_mpd_mkkey(rfs4_entry_t u_entry) 2675 10016 Thomas { 2676 10016 Thomas mds_mpd_t *mp = (mds_mpd_t *)u_entry; 2677 10016 Thomas 2678 10016 Thomas return ((void*)(uintptr_t)mp->mpd_id); 2679 6741 th199096 } 2680 6741 th199096 2681 6741 th199096 void 2682 6741 th199096 mds_mpd_encode(nfsv4_1_file_layout_ds_addr4 *ds_dev, uint_t *len, char **val) 2683 6741 th199096 { 2684 6741 th199096 char *xdr_ds_dev; 2685 6741 th199096 int xdr_size = 0; 2686 6741 th199096 XDR xdr; 2687 6741 th199096 2688 6741 th199096 ASSERT(val); 2689 6741 th199096 2690 6741 th199096 xdr_size = xdr_sizeof(xdr_nfsv4_1_file_layout_ds_addr4, ds_dev); 2691 6741 th199096 2692 6741 th199096 ASSERT(xdr_size); 2693 6741 th199096 2694 6741 th199096 xdr_ds_dev = kmem_alloc(xdr_size, KM_SLEEP); 2695 6741 th199096 2696 6741 th199096 xdrmem_create(&xdr, xdr_ds_dev, xdr_size, XDR_ENCODE); 2697 6741 th199096 2698 6741 th199096 if (xdr_nfsv4_1_file_layout_ds_addr4(&xdr, ds_dev) == FALSE) { 2699 6741 th199096 *len = 0; 2700 6741 th199096 *val = NULL; 2701 6741 th199096 kmem_free(xdr_ds_dev, xdr_size); 2702 6741 th199096 return; 2703 6741 th199096 } 2704 6741 th199096 2705 6741 th199096 *len = xdr_size; 2706 6741 th199096 *val = xdr_ds_dev; 2707 6741 th199096 } 2708 6741 th199096 2709 6741 th199096 /*ARGSUSED*/ 2710 6741 th199096 static bool_t 2711 7739 jwahlig mds_mpd_create(rfs4_entry_t u_entry, void *arg) 2712 6741 th199096 { 2713 10016 Thomas mds_mpd_t *mp = (mds_mpd_t *)u_entry; 2714 6741 th199096 mds_addmpd_t *maap = (mds_addmpd_t *)arg; 2715 6741 th199096 2716 10016 Thomas mp->mpd_id = maap->id; 2717 10016 Thomas mds_mpd_encode(maap->ds_addr4, &(mp->mpd_encoded_len), 2718 10016 Thomas &(mp->mpd_encoded_val)); 2719 10016 Thomas list_create(&mp->mpd_layouts_list, sizeof (mds_layout_t), 2720 10016 Thomas offsetof(mds_layout_t, mpd_layouts_next)); 2721 10016 Thomas 2722 10016 Thomas return (TRUE); 2723 10016 Thomas } 2724 10016 Thomas 2725 10016 Thomas 2726 10016 Thomas /*ARGSUSED*/ 2727 10016 Thomas static void 2728 10016 Thomas mds_mpd_destroy(rfs4_entry_t u_entry) 2729 10016 Thomas { 2730 10016 Thomas mds_mpd_t *mp = (mds_mpd_t *)u_entry; 2731 10016 Thomas nfs_server_instance_t *instp; 2732 10016 Thomas 2733 10016 Thomas instp = dbe_to_instp(u_entry->dbe); 2734 10016 Thomas ASSERT(instp->mds_mpd_id_space != NULL); 2735 10016 Thomas id_free(instp->mds_mpd_id_space, mp->mpd_id); 2736 10016 Thomas 2737 10016 Thomas kmem_free(mp->mpd_encoded_val, mp->mpd_encoded_len); 2738 10016 Thomas 2739 10016 Thomas #ifdef DEBUG 2740 10016 Thomas /* 2741 10016 Thomas * We should never get here as the layouts 2742 10016 Thomas * entries should be holding a reference against 2743 10016 Thomas * this mpd! 2744 10016 Thomas */ 2745 10016 Thomas rw_enter(&instp->mds_mpd_lock, RW_WRITER); 2746 10016 Thomas ASSERT(list_is_empty(&mp->mpd_layouts_list)); 2747 10016 Thomas rw_exit(&instp->mds_mpd_lock); 2748 10016 Thomas #endif 2749 10016 Thomas list_destroy(&mp->mpd_layouts_list); 2750 6741 th199096 } 2751 6741 th199096 2752 6741 th199096 /* 2753 6741 th199096 * The OTW device id is 128bits in length, we however are 2754 6741 th199096 * still using a uint_32 internally. 2755 6741 th199096 */ 2756 6741 th199096 mds_mpd_t * 2757 10016 Thomas mds_find_mpd(nfs_server_instance_t *instp, id_t id) 2758 10016 Thomas { 2759 10016 Thomas mds_mpd_t *mp; 2760 6741 th199096 bool_t create = FALSE; 2761 6741 th199096 2762 10016 Thomas mp = (mds_mpd_t *)rfs4_dbsearch(instp->mds_mpd_idx, 2763 6741 th199096 (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID); 2764 10016 Thomas return (mp); 2765 10016 Thomas } 2766 10016 Thomas 2767 10016 Thomas /* 2768 10016 Thomas * Plop kernel deviceid into the 128bit OTW deviceid 2769 10016 Thomas */ 2770 10016 Thomas void 2771 10016 Thomas mds_set_deviceid(id_t did, deviceid4 *otw_id) 2772 6741 th199096 { 2773 6741 th199096 ba_devid_t d; 2774 6741 th199096 2775 6741 th199096 bzero(&d, sizeof (d)); 2776 6741 th199096 d.i.did = did; 2777 6741 th199096 bcopy(&d, otw_id, sizeof (d)); 2778 6741 th199096 } 2779 6741 th199096 2780 6741 th199096 /* 2781 7739 jwahlig * Used by the walker to populate the deviceid list. 2782 6741 th199096 */ 2783 6741 th199096 void 2784 6741 th199096 mds_mpd_list(rfs4_entry_t entry, void *arg) 2785 6741 th199096 { 2786 10016 Thomas mds_mpd_t *mp = (mds_mpd_t *)entry; 2787 10016 Thomas mds_device_list_t *mdl = (mds_device_list_t *)arg; 2788 6741 th199096 2789 6741 th199096 deviceid4 *dlip; 2790 6741 th199096 2791 6741 th199096 /* 2792 6741 th199096 * If this entry is invalid or we should skip it 2793 6741 th199096 * go to the next one.. 2794 6741 th199096 */ 2795 10447 Thomas if (rfs4_dbe_skip_or_invalid(mp->mpd_dbe)) 2796 10447 Thomas return; 2797 10447 Thomas 2798 10447 Thomas dlip = &(mdl->mdl_dl[mdl->mdl_count]); 2799 6741 th199096 2800 10016 Thomas mds_set_deviceid(mp->mpd_id, dlip); 2801 6741 th199096 2802 6741 th199096 /* 2803 6741 th199096 * bump to the next devlist_item4 2804 6741 th199096 */ 2805 10447 Thomas mdl->mdl_count++; 2806 6741 th199096 } 2807 6741 th199096 2808 10016 Thomas /* ARGSUSED */ 2809 7811 Thomas ds_addrlist_t * 2810 10016 Thomas mds_find_ds_addrlist_by_mds_sid(nfs_server_instance_t *instp, 2811 10016 Thomas mds_sid *sid) 2812 10016 Thomas { 2813 10016 Thomas ds_addrlist_t *dp = NULL; 2814 10016 Thomas ds_guid_info_t *pgi; 2815 10016 Thomas ds_owner_t *dop; 2816 10016 Thomas ds_guid_t guid; 2817 10016 Thomas 2818 10016 Thomas /* 2819 10016 Thomas * Warning, do not, do not ever, free this guid! 2820 10016 Thomas */ 2821 10016 Thomas guid.stor_type = ZFS; 2822 10016 Thomas guid.ds_guid_u.zfsguid.zfsguid_len = sid->len; 2823 10016 Thomas guid.ds_guid_u.zfsguid.zfsguid_val = sid->val; 2824 10016 Thomas 2825 10016 Thomas /* 2826 10016 Thomas * First we need to find the ds_guid_info_t which 2827 10016 Thomas * corresponds to this mds_sid. 2828 10016 Thomas */ 2829 10016 Thomas pgi = mds_find_ds_guid_info_by_id(&guid); 2830 10016 Thomas if (pgi == NULL) 2831 10016 Thomas return (NULL); 2832 10016 Thomas 2833 10016 Thomas dop = pgi->ds_owner; 2834 10016 Thomas if (!dop) 2835 10016 Thomas goto error; 2836 10016 Thomas 2837 10016 Thomas /* 2838 10016 Thomas * XXX: If a ds_owner has multiple addresses, then just grab the first 2839 10016 Thomas * we find. 2840 10016 Thomas */ 2841 10016 Thomas dp = list_head(&dop->ds_addrlist_list); 2842 10016 Thomas if (dp) 2843 10016 Thomas rfs4_dbe_hold(dp->dbe); 2844 10016 Thomas 2845 10016 Thomas error: 2846 10016 Thomas 2847 10016 Thomas rfs4_dbe_rele(pgi->dbe); 2848 6741 th199096 return (dp); 2849 6741 th199096 } 2850 6741 th199096 2851 7811 Thomas ds_addrlist_t * 2852 7811 Thomas mds_find_ds_addrlist(nfs_server_instance_t *instp, uint32_t id) 2853 6741 th199096 { 2854 7811 Thomas ds_addrlist_t *dp; 2855 6741 th199096 bool_t create = FALSE; 2856 6741 th199096 2857 7811 Thomas dp = (ds_addrlist_t *)rfs4_dbsearch(instp->ds_addrlist_idx, 2858 6741 th199096 (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID); 2859 6741 th199096 return (dp); 2860 6741 th199096 } 2861 6741 th199096 2862 9404 Thomas void 2863 9404 Thomas mds_ds_addrlist_rele(ds_addrlist_t *dp) 2864 9404 Thomas { 2865 9404 Thomas rfs4_dbe_rele(dp->dbe); 2866 9404 Thomas } 2867 6741 th199096 2868 6741 th199096 /* 2869 6741 th199096 */ 2870 6741 th199096 static uint32_t 2871 6741 th199096 mds_str_hash(void *key) 2872 6741 th199096 { 2873 6741 th199096 char *addr = (char *)key; 2874 6741 th199096 int i; 2875 6741 th199096 uint32_t hash = 0; 2876 6741 th199096 2877 6741 th199096 for (i = 0; addr[i]; i++) { 2878 6741 th199096 hash <<= 1; 2879 6741 th199096 hash += (uint_t)addr[i]; 2880 6741 th199096 } 2881 6741 th199096 2882 6741 th199096 return (hash); 2883 6741 th199096 } 2884 6741 th199096 2885 10016 Thomas static uint32_t 2886 10016 Thomas mds_utf8string_hash(void *key) 2887 10016 Thomas { 2888 10016 Thomas utf8string *obj = (utf8string *)key; 2889 10016 Thomas int i; 2890 10016 Thomas uint32_t hash = 0; 2891 10016 Thomas 2892 10016 Thomas for (i = 0; i < obj->utf8string_len; i++) { 2893 10016 Thomas hash <<= 1; 2894 10016 Thomas hash += (uint_t)obj->utf8string_val[i]; 2895 10016 Thomas } 2896 10016 Thomas 2897 10016 Thomas return (hash); 2898 10016 Thomas } 2899 10016 Thomas 2900 10016 Thomas static bool_t 2901 10016 Thomas rfs41_invalid_expiry(rfs4_entry_t entry) 2902 9404 Thomas { 2903 9404 Thomas if (rfs4_dbe_is_invalid(entry->dbe)) 2904 9404 Thomas return (TRUE); 2905 9404 Thomas 2906 9404 Thomas return (FALSE); 2907 9404 Thomas } 2908 6741 th199096 2909 10016 Thomas static uint32_t 2910 10016 Thomas ds_addrlist_addrkey_hash(void *key) 2911 10016 Thomas { 2912 10016 Thomas return ((uint32_t)(uintptr_t)key); 2913 10016 Thomas } 2914 10016 Thomas 2915 10016 Thomas static void * 2916 10016 Thomas ds_addrlist_addrkey_mkkey(rfs4_entry_t entry) 2917 6741 th199096 { 2918 7811 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)entry; 2919 6741 th199096 2920 10016 Thomas return (&dp->ds_addr_key); 2921 10016 Thomas } 2922 10016 Thomas 2923 10016 Thomas /* 2924 10016 Thomas * Only compare the address portion and not the 2925 10016 Thomas * port info. We do this because the DS may 2926 10016 Thomas * have rebooted and gotten a different port 2927 10016 Thomas * number. 2928 10016 Thomas * 2929 10016 Thomas * XXX: What happens if we have multiple DSes 2930 10016 Thomas * on one box? I.e., a valid case for the same 2931 10016 Thomas * IP, but different ports? 2932 10016 Thomas */ 2933 6741 th199096 static int 2934 10016 Thomas ds_addrlist_addrkey_compare(rfs4_entry_t entry, void *key) 2935 6741 th199096 { 2936 7811 Thomas ds_addrlist_t *dp = (ds_addrlist_t *)entry; 2937 10016 Thomas uint64_t addr_key = *(uint64_t *)key; 2938 10016 Thomas 2939 10016 Thomas return (addr_key == dp->ds_addr_key); 2940 10016 Thomas } 2941 6741 th199096 2942 6741 th199096 /* 2943 7739 jwahlig * Data-server information (ds_owner) tables and indexes. 2944 6741 th199096 */ 2945 6741 th199096 static uint32_t 2946 7739 jwahlig ds_owner_hash(void *key) 2947 6741 th199096 { 2948 6741 th199096 return ((uint32_t)(uintptr_t)key); 2949 6741 th199096 } 2950 6741 th199096 2951 6741 th199096 static bool_t 2952 7739 jwahlig ds_owner_compare(rfs4_entry_t entry, void *key) 2953 6741 th199096 { 2954 7739 jwahlig ds_owner_t *dop = (ds_owner_t *)entry; 2955 6741 th199096 2956 7739 jwahlig return (dop->ds_id == (int)(uintptr_t)key); 2957 6741 th199096 2958 6741 th199096 } 2959 6741 th199096 2960 6741 th199096 static void * 2961 7739 jwahlig ds_owner_mkkey(rfs4_entry_t entry) 2962 6741 th199096 { 2963 7739 jwahlig ds_owner_t *dop = (ds_owner_t *)entry; 2964 6741 th199096 2965 7739 jwahlig return ((void *)(uintptr_t)dop->ds_id); 2966 6741 th199096 } 2967 6741 th199096 2968 6741 th199096 static bool_t 2969 7739 jwahlig ds_owner_inst_compare(rfs4_entry_t entry, void *key) 2970 6741 th199096 { 2971 7739 jwahlig ds_owner_t *dop = (ds_owner_t *)entry; 2972 6741 th199096 2973 7739 jwahlig return (strcmp(dop->identity, key) == 0); 2974 6741 th199096 } 2975 6741 th199096 2976 6741 th199096 static void * 2977 7739 jwahlig ds_owner_inst_mkkey(rfs4_entry_t entry) 2978 6741 th199096 { 2979 7739 jwahlig ds_owner_t *dop = (ds_owner_t *)entry; 2980 7739 jwahlig return (dop->identity); 2981 6741 th199096 } 2982 6741 th199096 2983 6741 th199096 /*ARGSUSED*/ 2984 6741 th199096 static bool_t