Home | History | Annotate | Download | only in nfs
      1   6741  th199096 /*
      2   6741  th199096  * CDDL HEADER START
      3   6741  th199096  *
      4   6741  th199096  * The contents of this file are subject to the terms of the
      5   6741  th199096  * Common Development and Distribution License (the "License").
      6   6741  th199096  * You may not use this file except in compliance with the License.
      7   6741  th199096  *
      8   6741  th199096  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   6741  th199096  * or http://www.opensolaris.org/os/licensing.
     10   6741  th199096  * See the License for the specific language governing permissions
     11   6741  th199096  * and limitations under the License.
     12   6741  th199096  *
     13   6741  th199096  * When distributing Covered Code, include this CDDL HEADER in each
     14   6741  th199096  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   6741  th199096  * If applicable, add the following below this CDDL HEADER, with the
     16   6741  th199096  * fields enclosed by brackets "[]" replaced with your own identifying
     17   6741  th199096  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   6741  th199096  *
     19   6741  th199096  * CDDL HEADER END
     20   6741  th199096  */
     21   6741  th199096 /*
     22   8422     James  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23   6741  th199096  * Use is subject to license terms.
     24   6741  th199096  */
     25   6741  th199096 
     26   6741  th199096 #include <sys/flock.h>
     27   6741  th199096 #include <nfs/export.h>
     28   6741  th199096 #include <sys/cmn_err.h>
     29   6741  th199096 #include <sys/atomic.h>
     30   6741  th199096 #include <nfs/nfs.h>
     31   6741  th199096 #include <nfs/nfs4.h>
     32   6741  th199096 #include <nfs/nfssys.h>
     33   6741  th199096 #include <nfs/lm.h>
     34   6741  th199096 #include <sys/pathname.h>
     35   6741  th199096 #include <sys/sdt.h>
     36   6741  th199096 #include <sys/nvpair.h>
     37   6741  th199096 #include <sys/sdt.h>
     38   6741  th199096 #include <sys/disp.h>
     39  10016    Thomas #include <sys/id_space.h>
     40   6741  th199096 
     41   6741  th199096 extern u_longlong_t nfs4_srv_caller_id;
     42   6741  th199096 
     43   7739   jwahlig #include <nfs/nfs_sstor_impl.h>
     44   6741  th199096 #include <nfs/mds_state.h>
     45   6741  th199096 #include <nfs/nfs41_sessions.h>
     46   6741  th199096 
     47   6741  th199096 #include <nfs/nfs41_filehandle.h>
     48   6741  th199096 
     49  10016    Thomas #include <nfs/spe_impl.h>
     50  10016    Thomas 
     51   6741  th199096 static void mds_do_lorecall(mds_lorec_t *);
     52   6741  th199096 static int  mds_lorecall_cmd(struct mds_reclo_args *, cred_t *);
     53   8312   webaker static int  mds_notify_device_cmd(struct mds_notifydev_args *, cred_t *);
     54   6741  th199096 
     55   7739   jwahlig extern void mds_do_cb_recall(struct rfs4_deleg_state *, bool_t);
     56   6741  th199096 
     57   6741  th199096 /*
     58   6741  th199096  * XXX - slrc_slot_size will more than likely have to be
     59   6741  th199096  *	 computed dynamically as the server adjusts the
     60   6741  th199096  *	 sessions' slot replay cache size. This should be
     61   6741  th199096  *	 good for proto.
     62   6741  th199096  */
     63   6741  th199096 slotid4 slrc_slot_size = MAXSLOTS;
     64  10475      rick slotid4	bc_slot_tab = 0;	/* backchan slots are set by client */
     65   6741  th199096 
     66   6741  th199096 /* The values below are rfs4_lease_time units */
     67   6741  th199096 
     68   6741  th199096 #ifdef DEBUG
     69   6741  th199096 #define	SESSION_CACHE_TIME 1
     70   6741  th199096 #else
     71   6741  th199096 #define	SESSION_CACHE_TIME 10
     72   6741  th199096 #endif
     73   6741  th199096 
     74   6741  th199096 #define	ONES_64	(0xFFFFFFFFFFFFFFFFuLL)
     75   6741  th199096 
     76   6741  th199096 /* Sessions */
     77   6741  th199096 static void mds_session_destroy(rfs4_entry_t);
     78   6741  th199096 static bool_t mds_session_expiry(rfs4_entry_t);
     79   7739   jwahlig static bool_t mds_session_create(rfs4_entry_t, void *);
     80   6741  th199096 static uint32_t sessid_hash(void *);
     81   6741  th199096 static bool_t sessid_compare(rfs4_entry_t, void *);
     82   6741  th199096 static void *sessid_mkkey(rfs4_entry_t);
     83   6741  th199096 
     84   8312   webaker /* function pointers for mdsadm */
     85   8312   webaker 
     86   6741  th199096 extern int (*mds_recall_lo)(struct mds_reclo_args *, cred_t *);
     87   8312   webaker extern int (*mds_notify_device)(struct mds_notifydev_args *, cred_t *);
     88   6741  th199096 
     89   6741  th199096 extern char *kstrdup(const char *);
     90   6741  th199096 
     91   6741  th199096 extern rfs4_client_t *findclient(nfs_server_instance_t *, nfs_client_id4 *,
     92   6741  th199096     bool_t *, rfs4_client_t *);
     93   6741  th199096 
     94   6741  th199096 extern rfs4_client_t *findclient_by_id(nfs_server_instance_t *, clientid4);
     95   6741  th199096 
     96   6741  th199096 extern rfs4_openowner_t *findopenowner(nfs_server_instance_t *, open_owner4 *,
     97   6741  th199096     bool_t *, seqid4);
     98   6741  th199096 
     99   7739   jwahlig extern void v4prot_sstor_init(nfs_server_instance_t *);
    100   7739   jwahlig 
    101   7739   jwahlig extern void rfs4_ss_retrieve_state(nfs_server_instance_t *);
    102   7739   jwahlig extern int nfs_doorfd;
    103   6741  th199096 
    104   6741  th199096 #ifdef DEBUG
    105   6741  th199096 #define	MDS_TABSIZE 17
    106   6741  th199096 #else
    107   6741  th199096 #define	MDS_TABSIZE 2047
    108   6741  th199096 #endif
    109   6741  th199096 
    110   6741  th199096 #define	MDS_MAXTABSZ 1024*1024
    111   6741  th199096 
    112   6741  th199096 extern uint32_t clientid_hash(void *);
    113   7739   jwahlig 
    114   7739   jwahlig /*
    115   7739   jwahlig  * Returns the instances capabilities flag word
    116   7739   jwahlig  * the form of:
    117   7739   jwahlig  *
    118   7739   jwahlig  *  EXCHGID4_FLAG_USE_NON_PNFS
    119   7739   jwahlig  *  EXCHGID4_FLAG_USE_PNFS_MDS
    120   7739   jwahlig  *  EXCHGID4_FLAG_USE_PNFS_DS
    121   7739   jwahlig  *
    122   7739   jwahlig  */
    123   7739   jwahlig uint32_t
    124   7739   jwahlig mds_get_capabilities(nfs_server_instance_t *instp)
    125   7739   jwahlig {
    126   7739   jwahlig 	uint32_t my_abilities = 0;
    127   7739   jwahlig 
    128   7739   jwahlig 	if (instp)
    129   7739   jwahlig 		my_abilities =
    130   7739   jwahlig 		    instp->inst_flags & EXCHGID4_FLAG_MASK_PNFS;
    131   7739   jwahlig 	return (my_abilities);
    132   7739   jwahlig }
    133   7739   jwahlig 
    134   6741  th199096 
    135   6741  th199096 /*ARGSUSED*/
    136   6741  th199096 static bool_t
    137   6741  th199096 mds_do_not_expire(rfs4_entry_t u_entry)
    138   6741  th199096 {
    139   6741  th199096 	return (FALSE);
    140   6741  th199096 }
    141   6741  th199096 
    142   6741  th199096 /*ARGSUSED*/
    143   6741  th199096 static stateid_t
    144   7739   jwahlig mds_create_stateid(rfs4_dbe_t *dbe, stateid_type_t id_type)
    145   6741  th199096 {
    146   6741  th199096 	stateid_t id;
    147   6741  th199096 
    148   7739   jwahlig 	id.v41_bits.boottime = dbe_to_instp(dbe)->start_time;
    149   6741  th199096 	id.v41_bits.state_ident = rfs4_dbe_getid(dbe);
    150   8422     James 	id.v41_bits.chgseq = 0;
    151   6741  th199096 	id.v41_bits.type = id_type;
    152   6741  th199096 	id.v41_bits.pid = 0;
    153   6741  th199096 
    154   6741  th199096 	return (id);
    155   6741  th199096 }
    156   6741  th199096 
    157   6741  th199096 
    158   6741  th199096 rfs4_openowner_t *
    159   7739   jwahlig mds_findopenowner(nfs_server_instance_t *instp, open_owner4 *openowner,
    160   7739   jwahlig     bool_t *create)
    161   6741  th199096 {
    162  10447    Thomas 	rfs4_openowner_t *oo;
    163   6741  th199096 	rfs4_openowner_t arg;
    164   6741  th199096 
    165  10447    Thomas 	arg.ro_owner = *openowner;
    166  10447    Thomas 	arg.ro_open_seqid = 0;
    167  10447    Thomas 	oo = (rfs4_openowner_t *)rfs4_dbsearch(instp->openowner_idx,
    168   6741  th199096 	    openowner, create, &arg, RFS4_DBS_VALID);
    169  10447    Thomas 	return (oo);
    170   6741  th199096 }
    171   6741  th199096 
    172   6741  th199096 rfs4_lo_state_t *
    173   6741  th199096 mds_findlo_state_by_owner(rfs4_lockowner_t *lo,
    174   6741  th199096 			rfs4_state_t *sp, bool_t *create)
    175   6741  th199096 {
    176   6741  th199096 	rfs4_lo_state_t *lsp;
    177   6741  th199096 	rfs4_lo_state_t arg;
    178   7739   jwahlig 	nfs_server_instance_t *instp;
    179   6741  th199096 
    180  10447    Thomas 	arg.rls_locker = lo;
    181  10447    Thomas 	arg.rls_state = sp;
    182  10447    Thomas 
    183  10447    Thomas 	instp = dbe_to_instp(lo->rl_dbe);
    184   7739   jwahlig 
    185   7739   jwahlig 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(instp->lo_state_owner_idx,
    186   6741  th199096 	    &arg, create, &arg, RFS4_DBS_VALID);
    187   6741  th199096 
    188   6741  th199096 	return (lsp);
    189   6741  th199096 }
    190   6741  th199096 
    191  10447    Thomas /* XXX: well clearly this needs to be cleaned up.. */
    192   6741  th199096 typedef union {
    193   6741  th199096 	struct {
    194   6741  th199096 		uint32_t start_time;
    195   6741  th199096 		uint32_t c_id;
    196   6741  th199096 	} impl_id;
    197   6741  th199096 	clientid4 id4;
    198   6741  th199096 } cid;
    199   6741  th199096 
    200   6741  th199096 int
    201   6741  th199096 mds_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
    202   6741  th199096 {
    203   6741  th199096 	stateid_t *id = (stateid_t *)stateid;
    204   6741  th199096 
    205  10447    Thomas 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
    206   6741  th199096 		return (NFS4_CHECK_STATEID_EXPIRED);
    207   6741  th199096 
    208   6741  th199096 	/* Stateid is some time in the future - that's bad */
    209  10447    Thomas 	if (sp->rs_stateid.v41_bits.chgseq < id->v41_bits.chgseq)
    210   6741  th199096 		return (NFS4_CHECK_STATEID_BAD);
    211   6741  th199096 
    212  10447    Thomas 	if (sp->rs_closed == TRUE)
    213   6741  th199096 		return (NFS4_CHECK_STATEID_CLOSED);
    214   6741  th199096 
    215   6741  th199096 	return (NFS4_CHECK_STATEID_OKAY);
    216   6741  th199096 }
    217   6741  th199096 
    218   6741  th199096 int
    219   6741  th199096 mds_fh_is_exi(struct exportinfo *exi, nfs41_fh_fmt_t *fhp)
    220   6741  th199096 {
    221   6741  th199096 	if (exi->exi_fid.fid_len != fhp->fh.v1.export_fid.len)
    222   6741  th199096 		return (0);
    223   6741  th199096 
    224   6741  th199096 	if (bcmp(exi->exi_fid.fid_data, fhp->fh.v1.export_fid.val,
    225   6741  th199096 	    fhp->fh.v1.export_fid.len) != 0)
    226   6741  th199096 		return (0);
    227   6741  th199096 
    228   6741  th199096 	if (exi->exi_fsid.val[0] != fhp->fh.v1.export_fsid.val[0] ||
    229   6741  th199096 	    exi->exi_fsid.val[1] != fhp->fh.v1.export_fsid.val[1])
    230   6741  th199096 		return (0);
    231   6741  th199096 
    232   6741  th199096 	return (1);
    233   6741  th199096 }
    234   6741  th199096 
    235   6741  th199096 /*
    236   6741  th199096  * This function is used as a target for the rfs4_dbe_walk() call
    237   6741  th199096  * below.  The purpose of this function is to see if the
    238   6741  th199096  * lockowner_state refers to a file that resides within the exportinfo
    239   6741  th199096  * export.  If so, then remove the lock_owner state (file locks and
    240   6741  th199096  * share "locks") for this object since the intent is the server is
    241   6741  th199096  * unexporting the specified directory.  Be sure to invalidate the
    242   6741  th199096  * object after the state has been released
    243   6741  th199096  */
    244   6741  th199096 void
    245   6741  th199096 mds_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
    246   6741  th199096 {
    247   6741  th199096 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
    248   6741  th199096 	struct exportinfo *exi = (struct exportinfo *)e;
    249   6741  th199096 	nfs41_fh_fmt_t   *fhp;
    250   6741  th199096 
    251  10447    Thomas 	fhp = (nfs41_fh_fmt_t *)
    252  10447    Thomas 	    lsp->rls_state->rs_finfo->rf_filehandle.nfs_fh4_val;
    253   6741  th199096 
    254   6741  th199096 	if (mds_fh_is_exi(exi, fhp)) {
    255  10447    Thomas 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
    256  10447    Thomas 		rfs4_dbe_invalidate(lsp->rls_dbe);
    257  10447    Thomas 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
    258   6741  th199096 	}
    259   6741  th199096 }
    260   6741  th199096 
    261   6741  th199096 /*
    262   6741  th199096  * This function is used as a target for the rfs4_dbe_walk() call
    263   6741  th199096  * below.  The purpose of this function is to see if the state refers
    264   6741  th199096  * to a file that resides within the exportinfo export.  If so, then
    265   6741  th199096  * remove the open state for this object since the intent is the
    266   6741  th199096  * server is unexporting the specified directory.  The main result for
    267   6741  th199096  * this type of entry is to invalidate it such it will not be found in
    268   6741  th199096  * the future.
    269   6741  th199096  */
    270   6741  th199096 void
    271   6741  th199096 mds_state_walk_callout(rfs4_entry_t u_entry, void *e)
    272   6741  th199096 {
    273   6741  th199096 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
    274   6741  th199096 	struct exportinfo *exi = (struct exportinfo *)e;
    275   6741  th199096 	nfs41_fh_fmt_t   *fhp;
    276   6741  th199096 
    277   6741  th199096 	fhp =
    278  10447    Thomas 	    (nfs41_fh_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
    279   6741  th199096 
    280   6741  th199096 	if (mds_fh_is_exi(exi, fhp)) {
    281   6741  th199096 		rfs4_state_close(sp, TRUE, FALSE, CRED());
    282  10447    Thomas 		rfs4_dbe_invalidate(sp->rs_dbe);
    283   6741  th199096 	}
    284   6741  th199096 }
    285   6741  th199096 
    286   6741  th199096 /*
    287   6741  th199096  * This function is used as a target for the rfs4_dbe_walk() call
    288   6741  th199096  * below.  The purpose of this function is to see if the state refers
    289   6741  th199096  * to a file that resides within the exportinfo export.  If so, then
    290   6741  th199096  * remove the deleg state for this object since the intent is the
    291   6741  th199096  * server is unexporting the specified directory.  The main result for
    292   6741  th199096  * this type of entry is to invalidate it such it will not be found in
    293   6741  th199096  * the future.
    294   6741  th199096  */
    295   6741  th199096 void
    296   6741  th199096 mds_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
    297   6741  th199096 {
    298   6741  th199096 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
    299   6741  th199096 	struct exportinfo *exi = (struct exportinfo *)e;
    300   6741  th199096 	nfs41_fh_fmt_t   *fhp;
    301   6741  th199096 
    302   6741  th199096 	fhp =
    303  10447    Thomas 	    (nfs41_fh_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
    304   6741  th199096 
    305   6741  th199096 	if (mds_fh_is_exi(exi, fhp)) {
    306  10447    Thomas 		rfs4_dbe_invalidate(dsp->rds_dbe);
    307   6741  th199096 	}
    308   6741  th199096 }
    309   6741  th199096 
    310   6741  th199096 /*
    311   6741  th199096  * This function is used as a target for the rfs4_dbe_walk() call
    312   6741  th199096  * below.  The purpose of this function is to see if the state refers
    313   6741  th199096  * to a file that resides within the exportinfo export.  If so, then
    314   6741  th199096  * release vnode hold for this object since the intent is the server
    315   6741  th199096  * is unexporting the specified directory.  Invalidation will prevent
    316   6741  th199096  * this struct from being found in the future.
    317   6741  th199096  */
    318   6741  th199096 void
    319   6741  th199096 mds_file_walk_callout(rfs4_entry_t u_entry, void *e)
    320   6741  th199096 {
    321   6741  th199096 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
    322   6741  th199096 	struct exportinfo *exi = (struct exportinfo *)e;
    323   6741  th199096 	nfs41_fh_fmt_t   *fhp;
    324   6741  th199096 	vnode_t *vp;
    325   7739   jwahlig 	nfs_server_instance_t *instp;
    326   6741  th199096 
    327  10447    Thomas 	fhp = (nfs41_fh_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
    328   6741  th199096 
    329   6741  th199096 	if (mds_fh_is_exi(exi, fhp) == 0)
    330   6741  th199096 		return;
    331   6741  th199096 
    332  10447    Thomas 	if ((vp = fp->rf_vp) != NULL) {
    333  10447    Thomas 		instp = dbe_to_instp(fp->rf_dbe);
    334   7739   jwahlig 		ASSERT(instp);
    335   9404    Thomas 
    336   6741  th199096 		/*
    337   6741  th199096 		 * don't leak monitors and remove the reference
    338   6741  th199096 		 * put on the vnode when the delegation was granted.
    339   6741  th199096 		 */
    340  10447    Thomas 		if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_READ) {
    341   7739   jwahlig 			(void) fem_uninstall(vp, instp->deleg_rdops,
    342   6741  th199096 			    (void *)fp);
    343   6741  th199096 			vn_open_downgrade(vp, FREAD);
    344  10447    Thomas 		} else if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) {
    345   7739   jwahlig 			(void) fem_uninstall(vp, instp->deleg_wrops,
    346   6741  th199096 			    (void *)fp);
    347   6741  th199096 			vn_open_downgrade(vp, FREAD|FWRITE);
    348   6741  th199096 		}
    349   9404    Thomas 
    350   6741  th199096 		mutex_enter(&vp->v_lock);
    351   7739   jwahlig 		(void) vsd_set(vp, instp->vkey, NULL);
    352   6741  th199096 		mutex_exit(&vp->v_lock);
    353   6741  th199096 		VN_RELE(vp);
    354  10447    Thomas 		fp->rf_vp = NULL;
    355  10447    Thomas 	}
    356  10447    Thomas 
    357  10447    Thomas 	rfs4_dbe_invalidate(fp->rf_dbe);
    358   6741  th199096 }
    359   6741  th199096 
    360   6741  th199096 /*
    361   6741  th199096  * --------------------------------------------------------
    362   6741  th199096  * MDS - NFSv4.1  Sessions
    363   6741  th199096  * --------------------------------------------------------
    364   6741  th199096  */
    365   6741  th199096 static uint32_t
    366   6741  th199096 sessid_hash(void *key)
    367   6741  th199096 {
    368   6741  th199096 	sid *idp = key;
    369   6741  th199096 
    370   6741  th199096 	return (idp->impl_id.s_id);
    371   6741  th199096 }
    372   6741  th199096 
    373   6741  th199096 static bool_t
    374   6741  th199096 sessid_compare(rfs4_entry_t entry, void *key)
    375   6741  th199096 {
    376  10448    Thomas 	mds_session_t	*sp = (mds_session_t *)entry;
    377   6741  th199096 	sessionid4	*idp = (sessionid4 *)key;
    378   6741  th199096 
    379  10448    Thomas 	return (bcmp(idp, &sp->sn_sessid, sizeof (sessionid4)) == 0);
    380   6741  th199096 }
    381   6741  th199096 
    382   6741  th199096 static void *
    383   6741  th199096 sessid_mkkey(rfs4_entry_t entry)
    384   6741  th199096 {
    385  10448    Thomas 	mds_session_t *sp = (mds_session_t *)entry;
    386  10448    Thomas 
    387  10448    Thomas 	return (&sp->sn_sessid);
    388   6741  th199096 }
    389   6741  th199096 
    390   6741  th199096 static bool_t
    391   6741  th199096 sess_clid_compare(rfs4_entry_t entry, void *key)
    392   6741  th199096 {
    393  10448    Thomas 	mds_session_t *sp = (mds_session_t *)entry;
    394   6741  th199096 	clientid4 *idp = key;
    395   6741  th199096 
    396  10448    Thomas 	return (*idp == sp->sn_clnt->rc_clientid);
    397   6741  th199096 }
    398   6741  th199096 
    399   6741  th199096 static void *
    400   6741  th199096 sess_clid_mkkey(rfs4_entry_t entry)
    401   6741  th199096 {
    402  10447    Thomas 	return (&(((mds_session_t *)entry)->sn_clnt->rc_clientid));
    403  10447    Thomas }
    404  10447    Thomas 
    405  10447    Thomas void
    406  10448    Thomas rfs41_session_rele(mds_session_t *sp)
    407  10448    Thomas {
    408  10448    Thomas 	rfs4_dbe_rele(sp->sn_dbe);
    409   6741  th199096 }
    410   6741  th199096 
    411   6741  th199096 mds_session_t *
    412   7739   jwahlig mds_findsession_by_id(nfs_server_instance_t *instp, sessionid4 sessid)
    413   6741  th199096 {
    414  10448    Thomas 	mds_session_t	*sp;
    415   7739   jwahlig 	rfs4_index_t	*idx = instp->mds_session_idx;
    416   6741  th199096 	bool_t		 create = FALSE;
    417   6741  th199096 
    418   7739   jwahlig 	rw_enter(&instp->findsession_lock, RW_READER);
    419  10448    Thomas 	sp = (mds_session_t *)rfs4_dbsearch(idx, sessid, &create, NULL,
    420   6741  th199096 	    RFS4_DBS_VALID);
    421   7739   jwahlig 	rw_exit(&instp->findsession_lock);
    422   6741  th199096 
    423  10448    Thomas 	return (sp);
    424   6741  th199096 }
    425   6741  th199096 
    426   6741  th199096 mds_session_t *
    427   7739   jwahlig mds_findsession_by_clid(nfs_server_instance_t *instp, clientid4 clid)
    428   6741  th199096 {
    429  10448    Thomas 	mds_session_t	*sp;
    430   6741  th199096 	bool_t		 create = FALSE;
    431   6741  th199096 
    432   7739   jwahlig 	rw_enter(&instp->findsession_lock, RW_READER);
    433  10448    Thomas 	sp = (mds_session_t *)rfs4_dbsearch(instp->mds_sess_clientid_idx, &clid,
    434   6741  th199096 	    &create, NULL, RFS4_DBS_VALID);
    435   7739   jwahlig 	rw_exit(&instp->findsession_lock);
    436   6741  th199096 
    437  10448    Thomas 	return (sp);
    438   6741  th199096 }
    439   6741  th199096 
    440   6741  th199096 /*
    441   6741  th199096  * A clientid can have multiple sessions associated with it. Hence,
    442   6741  th199096  * performing a raw 'mds_findsession' (even for a create) might
    443   6741  th199096  * yield a list of sessions associated with the clientid in question.
    444   6741  th199096  * Instead of delving deep into the rfs4_dbsearch engine to correct
    445   6741  th199096  * this now, we'll call our function directly and create an association
    446   6741  th199096  * between the session table and both primary (sessionid) index and
    447   6741  th199096  * secondary (clientid) index for the newly created session.
    448   6741  th199096  */
    449   6741  th199096 mds_session_t	*
    450   7739   jwahlig mds_createsession(nfs_server_instance_t *instp, session41_create_t *ap)
    451   6741  th199096 {
    452  10448    Thomas 	mds_session_t	*sp = NULL;
    453   7739   jwahlig 	rfs4_index_t	*idx = instp->mds_session_idx;
    454   6741  th199096 
    455   7739   jwahlig 	rw_enter(&instp->findsession_lock, RW_WRITER);
    456  10448    Thomas 	if ((sp = (mds_session_t *)rfs4_dbcreate(idx, (void *)ap)) == NULL) {
    457   6741  th199096 		DTRACE_PROBE1(mds__srv__createsession__fail,
    458   6741  th199096 		    session41_create_t *, ap);
    459   6741  th199096 	}
    460   7739   jwahlig 	rw_exit(&instp->findsession_lock);
    461  10448    Thomas 	return (sp);
    462   6741  th199096 }
    463   6741  th199096 
    464   6741  th199096 /*
    465   7397      rick  * mds_session_inval invalidates the session so other
    466   7397      rick  * threads won't "find" the session to place additional
    467   7397      rick  * callbacks. Destroy session even if no backchannel has
    468   7397      rick  * been established.
    469   6741  th199096  */
    470   7397      rick nfsstat4
    471  10448    Thomas mds_session_inval(mds_session_t	*sp)
    472   6741  th199096 {
    473   7397      rick 	nfsstat4	status;
    474   6741  th199096 
    475  10448    Thomas 	ASSERT(sp != NULL);
    476  10448    Thomas 	ASSERT(rfs4_dbe_islocked(sp->sn_dbe));
    477  10448    Thomas 
    478  10448    Thomas 	if (SN_CB_CHAN_EST(sp)) {
    479  10448    Thomas 		sess_channel_t	*bcp = sp->sn_back;
    480   6741  th199096 		sess_bcsd_t	*bsdp;
    481   6741  th199096 
    482   6741  th199096 		rw_enter(&bcp->cn_lock, RW_READER);
    483   6741  th199096 		if ((bsdp = CTOBSD(bcp)) == NULL)
    484   6741  th199096 			cmn_err(CE_PANIC, "mds_session_inval: BCSD Not Set");
    485   6741  th199096 
    486   7813      rick 		rw_enter(&bsdp->bsd_rwlock, RW_READER);
    487   7397      rick 		status = bsdp->bsd_stat = slot_cb_status(bsdp->bsd_stok);
    488   7813      rick 		rw_exit(&bsdp->bsd_rwlock);
    489   6741  th199096 
    490   6741  th199096 		rw_exit(&bcp->cn_lock);
    491   6741  th199096 	} else {
    492   6741  th199096 		cmn_err(CE_NOTE, "No back chan established");
    493   7397      rick 		status = NFS4_OK;
    494   6741  th199096 	}
    495   7813      rick 
    496   7813      rick 	/* only invalidate sess if no bc traffic */
    497   7813      rick 	if (status == NFS4_OK)
    498  10448    Thomas 		rfs4_dbe_invalidate(sp->sn_dbe);
    499   7813      rick 
    500   7397      rick 	return (status);
    501   6741  th199096 }
    502   6741  th199096 
    503   6741  th199096 /*
    504   6741  th199096  * 1) Invalidate the session in the DB (so it can't be found anymore)
    505   6741  th199096  * 2) Verify that there's no outstanding CB traffic. If so, return err.
    506   6741  th199096  * 3) Eventually the session will be reaped by the reaper_thread
    507   6741  th199096  */
    508   6741  th199096 nfsstat4
    509  10448    Thomas mds_destroysession(mds_session_t *sp)
    510   6741  th199096 {
    511   7397      rick 	nfsstat4	cbs;
    512   6741  th199096 
    513  10448    Thomas 	rfs4_dbe_lock(sp->sn_dbe);
    514  10448    Thomas 	cbs = mds_session_inval(sp);
    515  10448    Thomas 	rfs4_dbe_unlock(sp->sn_dbe);
    516   6741  th199096 
    517   6741  th199096 	/*
    518   9214      rick 	 * The reference/hold maintained from the session to the client
    519   9214      rick 	 * struct gets nuked when the DB calls rfs4_dbe_destroy, which
    520   9214      rick 	 * in turn calls mds_session_destroy.
    521   6741  th199096 	 */
    522   7813      rick 	if (cbs == NFS4_OK)
    523  10448    Thomas 		rfs41_session_rele(sp);
    524   7813      rick 
    525   7397      rick 	return (cbs);
    526   6741  th199096 }
    527   6741  th199096 
    528   6741  th199096 sn_chan_dir_t
    529   6741  th199096 pd2cd(channel_dir_from_server4 dir)
    530   6741  th199096 {
    531   6741  th199096 	switch (dir) {
    532   6741  th199096 	case CDFS4_FORE:
    533   6741  th199096 		return (SN_CHAN_FORE);
    534   6741  th199096 
    535   6741  th199096 	case CDFS4_BACK:
    536   6741  th199096 		return (SN_CHAN_BACK);
    537   6741  th199096 
    538   6741  th199096 	case CDFS4_BOTH:
    539   6741  th199096 	default:
    540   6741  th199096 		return (SN_CHAN_BOTH);
    541   6741  th199096 	}
    542   6741  th199096 	/* NOTREACHED */
    543   7397      rick }
    544   7397      rick 
    545   7397      rick /*
    546   7397      rick  * Delegation CB race detection support
    547   7397      rick  */
    548   7397      rick void
    549   7397      rick rfs41_deleg_rs_hold(rfs4_deleg_state_t *dsp)
    550   7397      rick {
    551  10447    Thomas 	atomic_add_32(&dsp->rds_rs.refcnt, 1);
    552   7397      rick }
    553   7397      rick 
    554   7397      rick void
    555   7397      rick rfs41_deleg_rs_rele(rfs4_deleg_state_t *dsp)
    556   7397      rick {
    557  10447    Thomas 	ASSERT(dsp->rds_rs.refcnt > 0);
    558  10447    Thomas 	atomic_add_32(&dsp->rds_rs.refcnt, -1);
    559  10447    Thomas 	if (dsp->rds_rs.refcnt == 0) {
    560  10447    Thomas 		bzero(dsp->rds_rs.sessid, sizeof (sessionid4));
    561  10447    Thomas 		dsp->rds_rs.seqid = dsp->rds_rs.slotno = 0;
    562   7397      rick 	}
    563   7397      rick }
    564   7397      rick 
    565   7397      rick void
    566   7397      rick rfs41_seq4_hold(void *data, uint32_t flag)
    567   7397      rick {
    568   7397      rick 	bit_attr_t	*p = (bit_attr_t *)data;
    569   7397      rick 	uint32_t	 idx = log2(flag);
    570   7397      rick 
    571   7397      rick 	ASSERT(p[idx].ba_bit == flag);
    572   7397      rick 	atomic_add_32(&p[idx].ba_refcnt, 1);
    573   7397      rick 	p[idx].ba_trigger = gethrestime_sec();
    574   7397      rick }
    575   7397      rick 
    576   7397      rick void
    577   7397      rick rfs41_seq4_rele(void *data, uint32_t flag)
    578   7397      rick {
    579   7397      rick 	bit_attr_t	*p = (bit_attr_t *)data;
    580   7397      rick 	uint32_t	 idx = log2(flag);
    581   7397      rick 
    582   7397      rick 	ASSERT(p[idx].ba_bit == flag);
    583   7397      rick 	if (p[idx].ba_refcnt > 0)
    584   7397      rick 		atomic_add_32(&p[idx].ba_refcnt, -1);
    585   7397      rick 	p[idx].ba_trigger = gethrestime_sec();
    586   6741  th199096 }
    587   6741  th199096 
    588   6741  th199096 sess_channel_t *
    589   6741  th199096 rfs41_create_session_channel(channel_dir_from_server4 dir)
    590   6741  th199096 {
    591   6741  th199096 	sess_channel_t   *cp;
    592   6741  th199096 	sess_bcsd_t	 *bp;
    593   6741  th199096 
    594   6741  th199096 	cp = (sess_channel_t *)kmem_zalloc(sizeof (sess_channel_t), KM_SLEEP);
    595   6741  th199096 	rw_init(&cp->cn_lock, NULL, RW_DEFAULT, NULL);
    596   6741  th199096 
    597   6741  th199096 	switch (dir) {
    598   6741  th199096 	case CDFS4_FORE:
    599   6741  th199096 		break;
    600   6741  th199096 
    601   6741  th199096 	case CDFS4_BOTH:
    602   6741  th199096 	case CDFS4_BACK:
    603   6741  th199096 		/* BackChan Specific Data */
    604   6741  th199096 		bp = (sess_bcsd_t *)kmem_zalloc(sizeof (sess_bcsd_t), KM_SLEEP);
    605   7397      rick 		rw_init(&bp->bsd_rwlock, NULL, RW_DEFAULT, NULL);
    606   6741  th199096 		cp->cn_csd = (sess_bcsd_t *)bp;
    607   6741  th199096 		break;
    608   6741  th199096 	}
    609   6741  th199096 	return (cp);
    610   6741  th199096 }
    611   6741  th199096 
    612   6741  th199096 void
    613   7813      rick rfs41_destroy_session_channel(mds_session_t *sp, channel_dir_from_server4 dir)
    614   6741  th199096 {
    615   7813      rick 	sess_channel_t	*cp;
    616   6741  th199096 	sess_bcsd_t	*bp;
    617   6741  th199096 
    618   7813      rick 	if (sp == NULL)
    619   7813      rick 		return;
    620   7813      rick 	if (dir == CDFS4_FORE && sp->sn_fore == NULL)
    621   7813      rick 		return;
    622   7813      rick 	if (dir == CDFS4_BACK && sp->sn_back == NULL)
    623   6741  th199096 		return;
    624   6741  th199096 
    625   7813      rick 	if (sp->sn_bdrpc) {
    626   7813      rick 		ASSERT(sp->sn_fore == sp->sn_back);
    627   7813      rick 		sp->sn_fore = NULL;
    628   7813      rick 		goto back;
    629   7813      rick 	}
    630   6741  th199096 
    631   7813      rick 	if (dir == CDFS4_FORE || dir == CDFS4_BOTH) {
    632   7813      rick fore:
    633   7813      rick 		if (sp->sn_fore == NULL)
    634   7813      rick 			return;
    635   7813      rick 		cp = sp->sn_fore;
    636   7813      rick 
    637   7813      rick 		rw_destroy(&cp->cn_lock);
    638   7813      rick 		kmem_free(cp, sizeof (sess_channel_t));
    639   7813      rick 		sp->sn_fore = NULL;
    640   7813      rick 	}
    641   7813      rick 
    642   7813      rick 	if (dir == CDFS4_BACK || dir == CDFS4_BOTH) {
    643   7813      rick back:
    644   7813      rick 		if (sp->sn_back == NULL)
    645   7813      rick 			return;
    646   7813      rick 		cp = sp->sn_back;
    647   7813      rick 
    648   6741  th199096 		bp = (sess_bcsd_t *)cp->cn_csd;
    649   7397      rick 		rw_destroy(&bp->bsd_rwlock);
    650   6741  th199096 		kmem_free(bp, sizeof (sess_bcsd_t));
    651   7813      rick 
    652   7813      rick 		rw_destroy(&cp->cn_lock);
    653   7813      rick 		kmem_free(cp, sizeof (sess_channel_t));
    654   7813      rick 		sp->sn_back = NULL;
    655   6741  th199096 	}
    656   6741  th199096 }
    657   6741  th199096 
    658   6741  th199096 /*
    659   6741  th199096  * Create/Initialize the session for this rfs4_client_t. Also
    660   6741  th199096  * create its slot replay cache as per the server's resource
    661   6741  th199096  * constraints.
    662   6741  th199096  */
    663   6741  th199096 /* ARGSUSED */
    664   6741  th199096 static bool_t
    665  10016    Thomas mds_session_create(rfs4_entry_t u_entry, void *arg)
    666   6741  th199096 {
    667  10448    Thomas 	mds_session_t		*sp = (mds_session_t *)u_entry;
    668   6741  th199096 	session41_create_t	*ap = (session41_create_t *)arg;
    669   6741  th199096 	sess_channel_t		*ocp = NULL;
    670   6741  th199096 	sid			*sidp;
    671   6741  th199096 	SVCMASTERXPRT		*mxprt;
    672   6741  th199096 	uint32_t		 i;
    673   6741  th199096 	int			 bdrpc;
    674   6741  th199096 	rpcprog_t		 prog;
    675   6741  th199096 	channel_dir_from_server4 dir;
    676   6741  th199096 	sess_bcsd_t		*bsdp;
    677  10475      rick 	nfs_server_instance_t	*instp;
    678  10475      rick 	int			 max_slots;
    679  10475      rick 	nfsstat4		 sle;
    680  10475      rick 	struct svc_req		*req;
    681   6741  th199096 
    682  10448    Thomas 	ASSERT(sp != NULL);
    683  10448    Thomas 	if (sp == NULL)
    684   6741  th199096 		return (FALSE);
    685   7739   jwahlig 
    686  10448    Thomas 	instp = dbe_to_instp(sp->sn_dbe);
    687   6741  th199096 
    688   6741  th199096 	/*
    689   9214      rick 	 * Back pointer/ref to parent data struct (rfs4_client_t)
    690   6741  th199096 	 */
    691  10448    Thomas 	sp->sn_clnt = (rfs4_client_t *)ap->cs_client;
    692  10448    Thomas 	rfs4_dbe_hold(sp->sn_clnt->rc_dbe);
    693  10475      rick 	req = (struct svc_req *)ap->cs_req;
    694  10475      rick 	mxprt = (SVCMASTERXPRT *)req->rq_xprt->xp_master;
    695   6741  th199096 
    696   6741  th199096 	/*
    697   6741  th199096 	 * Handcrafting the session id
    698   6741  th199096 	 */
    699  10448    Thomas 	sidp = (sid *)&sp->sn_sessid;
    700   6741  th199096 	sidp->impl_id.pad0 = 0x00000000;
    701   6741  th199096 	sidp->impl_id.pad1 = 0xFFFFFFFF;
    702   7739   jwahlig 	sidp->impl_id.start_time = instp->start_time;
    703  10448    Thomas 	sidp->impl_id.s_id = (uint32_t)rfs4_dbe_getid(sp->sn_dbe);
    704   6741  th199096 
    705   6741  th199096 	/*
    706   6741  th199096 	 * Process csa_flags; note that CREATE_SESSION4_FLAG_CONN_BACK_CHAN
    707   6741  th199096 	 * is processed below since it affects direction and setup of the
    708   6741  th199096 	 * backchannel accordingly.
    709   6741  th199096 	 */
    710  10448    Thomas 	sp->sn_csflags = 0;
    711   6741  th199096 	if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_PERSIST)
    712   6741  th199096 		/* XXX - Worry about persistence later */
    713  10448    Thomas 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_PERSIST;
    714   6741  th199096 
    715   6741  th199096 	if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_RDMA)
    716   6741  th199096 		/* XXX - No RDMA for now */
    717  10448    Thomas 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_RDMA;
    718   6741  th199096 
    719   6741  th199096 	/*
    720   6741  th199096 	 * Initialize some overall sessions values
    721   6741  th199096 	 */
    722  10448    Thomas 	sp->sn_bc.progno = ap->cs_aotw.csa_cb_program;
    723  10448    Thomas 	sp->sn_laccess = gethrestime_sec();
    724  10448    Thomas 	sp->sn_flags = 0;
    725   6741  th199096 
    726   6741  th199096 	/*
    727   6741  th199096 	 * Check if client has specified that the FORE channel should
    728   6741  th199096 	 * also be used for call back traffic (ie. bidir RPC). If so,
    729   6741  th199096 	 * let's try to accomodate the request.
    730   6741  th199096 	 */
    731   6741  th199096 	DTRACE_PROBE1(csa__flags, uint32_t, ap->cs_aotw.csa_flags);
    732   6741  th199096 	bdrpc = ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_BACK_CHAN;
    733   6741  th199096 
    734   6741  th199096 	if (bdrpc) {
    735   6741  th199096 		SVCCB_ARGS cbargs;
    736  10448    Thomas 		prog = sp->sn_bc.progno;
    737   6741  th199096 		cbargs.xprt = mxprt;
    738   6741  th199096 		cbargs.prog = prog;
    739   6741  th199096 		cbargs.vers = NFS_CB;
    740   6741  th199096 		cbargs.family = AF_INET;
    741  10448    Thomas 		cbargs.tag = (void *)sp->sn_sessid;
    742   6741  th199096 
    743  10475      rick 		if (SVC_CTL(req->rq_xprt, SVCCTL_SET_CBCONN, (void *)&cbargs)) {
    744   6741  th199096 			/*
    745   6741  th199096 			 * Couldn't create a bi-dir RPC connection. Reset
    746   6741  th199096 			 * bdrpc so that the session's channel flags are
    747   6741  th199096 			 * set appropriately and the client knows it needs
    748   6741  th199096 			 * to do the BIND_CONN_TO_SESSION dance in order
    749   6741  th199096 			 * to establish a callback path.
    750   6741  th199096 			 */
    751   6741  th199096 			bdrpc = 0;
    752   6741  th199096 		}
    753   6741  th199096 	}
    754   6741  th199096 
    755   6741  th199096 	/*
    756   6741  th199096 	 * Session's channel flags depending on bdrpc
    757   6741  th199096 	 */
    758  10448    Thomas 	sp->sn_bdrpc = bdrpc;
    759  10448    Thomas 	dir = sp->sn_bdrpc ? (CDFS4_FORE | CDFS4_BACK) : CDFS4_FORE;
    760   6741  th199096 	ocp = rfs41_create_session_channel(dir);
    761   6741  th199096 	ocp->cn_dir = dir;
    762  10448    Thomas 	sp->sn_fore = ocp;
    763   6741  th199096 
    764   6741  th199096 	/*
    765  10475      rick 	 * Check if channel attrs will be flexible enough for future
    766  10475      rick 	 * purposes. Channel attribute enforcement is done as part of
    767  10475      rick 	 * COMPOUND processing.
    768   6741  th199096 	 */
    769   6741  th199096 	ocp->cn_attrs = ap->cs_aotw.csa_fore_chan_attrs;
    770  10475      rick 	if (sle = sess_chan_limits(ocp)) {
    771  10475      rick 		ap->cs_error = sle;
    772  10475      rick 		return (FALSE);
    773  10475      rick 	}
    774   6741  th199096 
    775   6741  th199096 	/*
    776   6741  th199096 	 * No need for locks/synchronization at this time,
    777   6741  th199096 	 * since we're barely creating the session.
    778   6741  th199096 	 */
    779  10448    Thomas 	if (sp->sn_bdrpc) {
    780   6741  th199096 		/*
    781   6741  th199096 		 * bcsd got built as part of the channel's construction.
    782   6741  th199096 		 */
    783   6741  th199096 		if ((bsdp = CTOBSD(ocp)) == NULL) {
    784   6741  th199096 			cmn_err(CE_PANIC, "Back Chan Spec Data Not Set\t"
    785   6741  th199096 			    "<Internal Inconsistency>");
    786   6741  th199096 		}
    787  10472      rick 		bc_slot_tab = ap->cs_aotw.csa_back_chan_attrs.ca_maxrequests;
    788  10467      rick 		slrc_table_create(&bsdp->bsd_stok, bc_slot_tab);
    789  10448    Thomas 		sp->sn_csflags |= CREATE_SESSION4_FLAG_CONN_BACK_CHAN;
    790  10448    Thomas 		sp->sn_back = ocp;
    791   6741  th199096 
    792   6741  th199096 	} else {
    793   6741  th199096 		/*
    794   6741  th199096 		 * If not doing bdrpc, then we expect the client to perform
    795   6741  th199096 		 * an explicit BIND_CONN_TO_SESSION if it wants callback
    796   6741  th199096 		 * traffic. Subsequently, the cb channel should be set up
    797   9394         P 		 * at that point along with its corresponding slot (see
    798   7397      rick 		 * rfs41_bc_setup).
    799   6741  th199096 		 */
    800  10448    Thomas 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_BACK_CHAN;
    801  10448    Thomas 		sp->sn_back = NULL;
    802   6741  th199096 		prog = 0;
    803   7397      rick 
    804   7397      rick 		/*
    805   7397      rick 		 * XXX 08/15/2008 (rick) - if the channel is not bidir when
    806   7397      rick 		 *	created in CREATE_SESSION, then we should save off
    807   7397      rick 		 *	the ap->cs_aotw.csa_back_chan_attrs in case later
    808   7397      rick 		 *	a bc2s is called to create the back channel.
    809   7397      rick 		 */
    810   6741  th199096 	}
    811   6741  th199096 
    812   6741  th199096 	/*
    813   6741  th199096 	 * We're just creating the session... there _shouldn't_ be any
    814   6741  th199096 	 * other threads wanting to add connections to this sessions'
    815   6741  th199096 	 * conn list, so we purposefully do _not_ take the ocp->cn_lock
    816   6741  th199096 	 *
    817   6741  th199096 	 * sn_bc fields are all initialized to 0 (via zalloc)
    818   6741  th199096 	 */
    819   6741  th199096 
    820  10475      rick 	SVC_CTL(req->rq_xprt, SVCCTL_SET_TAG, (void *)sp->sn_sessid);
    821  10448    Thomas 
    822  10448    Thomas 	if (sp->sn_bdrpc) {
    823  10448    Thomas 		atomic_add_32(&sp->sn_bc.pngcnt, 1);
    824   6741  th199096 	}
    825   6741  th199096 
    826   6741  th199096 	/*
    827   6741  th199096 	 * Now we allocate space for the slrc, initializing each slot's
    828   6741  th199096 	 * sequenceid and slotid to zero and a (pre)cached result of
    829   6741  th199096 	 * NFS4ERR_SEQ_MISORDERED. Note that we zero out the entries
    830   6741  th199096 	 * by virtue of the z-alloc.
    831   6741  th199096 	 */
    832   9394         P 	max_slots = ocp->cn_attrs.ca_maxrequests;
    833  10448    Thomas 	slrc_table_create(&sp->sn_replay, max_slots);
    834   7397      rick 
    835   7397      rick 	/* only initialize bits relevant to session scope */
    836  10448    Thomas 	bzero(&sp->sn_seq4, sizeof (bit_attr_t) * BITS_PER_WORD);
    837   7397      rick 	for (i = 1; i <= SEQ4_HIGH_BIT && i != 0; i <<= 1) {
    838   7397      rick 		uint32_t idx = log2(i);
    839   7397      rick 
    840   7397      rick 		switch (i) {
    841   7397      rick 		case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING:
    842   7397      rick 		case SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED:
    843   7397      rick 		case SEQ4_STATUS_CB_PATH_DOWN_SESSION:
    844   7397      rick 		case SEQ4_STATUS_BACKCHANNEL_FAULT:
    845  10448    Thomas 			sp->sn_seq4[idx].ba_bit = i;
    846   7397      rick 			break;
    847   7397      rick 		default:
    848   7397      rick 			/* already bzero'ed */
    849   7397      rick 			break;
    850   7397      rick 		}
    851   7397      rick 	}
    852   7397      rick 
    853  10448    Thomas 	if (sp->sn_bdrpc) {
    854   7397      rick 		/*
    855   7397      rick 		 * Recall that for CB_PATH_DOWN[_SESSION], the refcnt
    856   7397      rick 		 * indicates the number of active back channel conns
    857   7397      rick 		 */
    858  10448    Thomas 		rfs41_seq4_hold(&sp->sn_seq4, SEQ4_STATUS_CB_PATH_DOWN_SESSION);
    859  10448    Thomas 		rfs41_seq4_hold(&sp->sn_clnt->rc_seq4,
    860  10447    Thomas 		    SEQ4_STATUS_CB_PATH_DOWN);
    861   6741  th199096 	}
    862   6741  th199096 	return (TRUE);
    863   6741  th199096 }
    864   6741  th199096 
    865   6741  th199096 /* ARGSUSED */
    866   6741  th199096 static void
    867   6741  th199096 mds_session_destroy(rfs4_entry_t u_entry)
    868   6741  th199096 {
    869  10448    Thomas 	mds_session_t	*sp = (mds_session_t *)u_entry;
    870   7397      rick 	sess_bcsd_t	*bsdp;
    871   7397      rick 
    872  10448    Thomas 	if (SN_CB_CHAN_EST(sp) && ((bsdp = CTOBSD(sp->sn_back)) != NULL))
    873   9394         P 		slrc_table_destroy(bsdp->bsd_stok);
    874   6741  th199096 
    875   6741  th199096 	/*
    876   6741  th199096 	 * XXX - A session can have multiple BC clnt handles that need
    877   6741  th199096 	 *	 to be discarded. mds_session_inval calls CLNT_DESTROY
    878   6741  th199096 	 *	 which will remove the CB client handle from the global
    879   6741  th199096 	 *	 list (cb_clnt_list) now. This will have to change once
    880   6741  th199096 	 *	 we manage the BC clnt handles per session.
    881   6741  th199096 	 */
    882   6741  th199096 
    883   6741  th199096 	/*
    884   7813      rick 	 * Remove the fore and back channels.
    885   6741  th199096 	 */
    886  10448    Thomas 	rfs41_destroy_session_channel(sp, CDFS4_BOTH);
    887   6741  th199096 
    888   6741  th199096 	/*
    889   6741  th199096 	 * Nuke slot replay cache for this session
    890   6741  th199096 	 */
    891  10448    Thomas 	if (sp->sn_replay) {
    892  10448    Thomas 		slrc_table_destroy(sp->sn_replay);
    893  10448    Thomas 		sp->sn_replay = NULL;
    894   7813      rick 	}
    895   9214      rick 
    896   9214      rick 	/*
    897   9214      rick 	 * Remove reference to parent data struct
    898   9214      rick 	 */
    899  10448    Thomas 	if (sp->sn_clnt)
    900  10448    Thomas 		rfs4_client_rele(sp->sn_clnt);
    901   6741  th199096 }
    902   6741  th199096 
    903   6741  th199096 static bool_t
    904   6741  th199096 mds_session_expiry(rfs4_entry_t u_entry)
    905   6741  th199096 {
    906  10448    Thomas 	mds_session_t	*sp = (mds_session_t *)u_entry;
    907  10448    Thomas 
    908  10448    Thomas 	if (sp == NULL || rfs4_dbe_is_invalid(sp->sn_dbe))
    909   9214      rick 		return (TRUE);
    910   9214      rick 
    911  10448    Thomas 	if (rfs4_lease_expired(sp->sn_clnt))
    912   6741  th199096 		return (TRUE);
    913   6741  th199096 
    914   6741  th199096 	return (FALSE);
    915   6741  th199096 }
    916   6741  th199096 
    917   7739   jwahlig void
    918   7739   jwahlig mds_kill_session_callout(rfs4_entry_t u_entry, void *arg)
    919   7739   jwahlig {
    920   7739   jwahlig 	rfs4_client_t *cp = (rfs4_client_t *)arg;
    921  10448    Thomas 	mds_session_t *sp = (mds_session_t *)u_entry;
    922  10448    Thomas 
    923  10448    Thomas 	if (sp->sn_clnt == cp && !(rfs4_dbe_is_invalid(sp->sn_dbe))) {
    924   7813      rick 		/*
    925   7813      rick 		 * client is going away; so no need to check for
    926   7813      rick 		 * CB channel traffic before destroying a session.
    927   7813      rick 		 */
    928  10448    Thomas 		rfs4_dbe_invalidate(sp->sn_dbe);
    929   9404    Thomas 	}
    930   7739   jwahlig }
    931   7739   jwahlig 
    932   7739   jwahlig void
    933   7739   jwahlig mds_clean_up_sessions(rfs4_client_t *cp)
    934   7739   jwahlig {
    935   7739   jwahlig 	nfs_server_instance_t *instp;
    936   7739   jwahlig 
    937  10447    Thomas 	instp = dbe_to_instp(cp->rc_dbe);
    938   7739   jwahlig 
    939   7739   jwahlig 	if (instp->mds_session_tab != NULL)
    940   7739   jwahlig 		rfs4_dbe_walk(instp->mds_session_tab,
    941   7739   jwahlig 		    mds_kill_session_callout, cp);
    942   7739   jwahlig }
    943   7739   jwahlig 
    944   6741  th199096 /*
    945   6741  th199096  * -----------------------------------------------
    946   6741  th199096  * MDS: Layout tables.
    947   6741  th199096  * -----------------------------------------------
    948   6741  th199096  */
    949   6741  th199096 static uint32_t
    950   6741  th199096 mds_layout_hash(void *key)
    951   6741  th199096 {
    952  10016    Thomas 	layout_core_t	*lc = (layout_core_t *)key;
    953  10016    Thomas 	int		i;
    954  10016    Thomas 	uint32_t	hash = 0;
    955  10016    Thomas 
    956  10016    Thomas 	if (lc->lc_stripe_count == 0)
    957  10016    Thomas 		return (0);
    958  10016    Thomas 
    959  10016    Thomas 	/*
    960  10016    Thomas 	 * Hash the first mds_sid
    961  10016    Thomas 	 */
    962  10016    Thomas 	for (i = 0; i < lc->lc_mds_sids[0].len; i++) {
    963  10016    Thomas 		hash <<= 1;
    964  10016    Thomas 		hash += (uint_t)lc->lc_mds_sids[0].val[i];
    965  10016    Thomas 	}
    966  10016    Thomas 
    967  10016    Thomas 	return (hash);
    968   6741  th199096 }
    969   6741  th199096 
    970   6741  th199096 static bool_t
    971   6741  th199096 mds_layout_compare(rfs4_entry_t entry, void *key)
    972   6741  th199096 {
    973  10016    Thomas 	mds_layout_t	*lp = (mds_layout_t *)entry;
    974  10016    Thomas 	layout_core_t	*lc = (layout_core_t *)key;
    975  10016    Thomas 
    976  10016    Thomas 	int		i;
    977  10016    Thomas 
    978  10016    Thomas 	if (lc->lc_stripe_unit == lp->mlo_lc.lc_stripe_unit) {
    979  10016    Thomas 		if (lc->lc_stripe_count ==
    980  10016    Thomas 		    lp->mlo_lc.lc_stripe_count) {
    981  10016    Thomas 			for (i = 0; i < lc->lc_stripe_count; i++) {
    982  10016    Thomas 				if (lc->lc_mds_sids[i].len !=
    983  10016    Thomas 				    lp->mlo_lc.lc_mds_sids[i].len) {
    984  10016    Thomas 					return (0);
    985  10016    Thomas 				}
    986  10016    Thomas 
    987  10016    Thomas 				if (bcmp(lc->lc_mds_sids[i].val,
    988  10016    Thomas 				    lp->mlo_lc.lc_mds_sids[i].val,
    989  10016    Thomas 				    lc->lc_mds_sids[i].len)) {
    990  10016    Thomas 					return (0);
    991  10016    Thomas 				}
    992  10016    Thomas 			}
    993  10016    Thomas 
    994  10016    Thomas 			/*
    995  10016    Thomas 			 * Everything matches!
    996  10016    Thomas 			 */
    997  10016    Thomas 			return (1);
    998  10016    Thomas 		}
    999  10016    Thomas 	}
   1000  10016    Thomas 
   1001  10016    Thomas 	return (0);
   1002  10016    Thomas }
   1003  10016    Thomas 
   1004  10016    Thomas static void *
   1005  10016    Thomas mds_layout_mkkey(rfs4_entry_t entry)
   1006  10016    Thomas {
   1007   6741  th199096 	mds_layout_t *lp = (mds_layout_t *)entry;
   1008   6741  th199096 
   1009  10016    Thomas 	return ((void *)&lp->mlo_lc);
   1010  10016    Thomas }
   1011  10016    Thomas 
   1012  10016    Thomas static uint32_t
   1013  10016    Thomas mds_layout_id_hash(void *key)
   1014  10016    Thomas {
   1015  10016    Thomas 	return ((uint32_t)(uintptr_t)key);
   1016  10016    Thomas }
   1017  10016    Thomas 
   1018  10016    Thomas static bool_t
   1019  10016    Thomas mds_layout_id_compare(rfs4_entry_t entry, void *key)
   1020   6741  th199096 {
   1021   6741  th199096 	mds_layout_t *lp = (mds_layout_t *)entry;
   1022   6741  th199096 
   1023  10016    Thomas 	return (lp->mlo_id == (int)(uintptr_t)key);
   1024  10016    Thomas }
   1025  10016    Thomas 
   1026  10016    Thomas static void *
   1027  10016    Thomas mds_layout_id_mkkey(rfs4_entry_t entry)
   1028  10016    Thomas {
   1029  10016    Thomas 	mds_layout_t *lp = (mds_layout_t *)entry;
   1030  10016    Thomas 
   1031  10016    Thomas 	return ((void *)(uintptr_t)lp->mlo_id);
   1032  10016    Thomas }
   1033  10016    Thomas 
   1034  10016    Thomas typedef struct {
   1035  10016    Thomas 	uint32_t			id;
   1036  10016    Thomas 	nfsv4_1_file_layout_ds_addr4	*ds_addr4;
   1037  10016    Thomas } mds_addmpd_t;
   1038  10016    Thomas 
   1039  10016    Thomas /*
   1040  10016    Thomas  * ================================================================
   1041  10016    Thomas  *	XXX: Both mds_gather_mds_sids and mds_gen_default_layout
   1042  10016    Thomas  *	have been left in to support installations with no
   1043  10016    Thomas  *	policies defined. In short, we do not force people to
   1044  10016    Thomas  *	set up a policy system. Whenever the SMF portion of the
   1045  10016    Thomas  *	code comes along, we will nuke these functions and
   1046  10016    Thomas  *	force a real default to exist.
   1047  10016    Thomas  *  ================================================================
   1048  10016    Thomas  */
   1049   6741  th199096 
   1050   6741  th199096 struct mds_gather_args {
   1051  10016    Thomas 	layout_core_t	lc;
   1052  10016    Thomas 	int 		found;
   1053   6741  th199096 };
   1054   6741  th199096 
   1055  10016    Thomas static void
   1056  10016    Thomas mds_gather_mds_sids(rfs4_entry_t entry, void *arg)
   1057  10016    Thomas {
   1058  10016    Thomas 	ds_guid_info_t		*pgi = (ds_guid_info_t *)entry;
   1059  10016    Thomas 	struct mds_gather_args	*gap = (struct mds_gather_args *)arg;
   1060   6741  th199096 
   1061   9407    Thomas 	int i, j;
   1062   9407    Thomas 
   1063  10016    Thomas 	if (rfs4_dbe_skip_or_invalid(pgi->dbe))
   1064  10016    Thomas 		return;
   1065  10016    Thomas 
   1066  10016    Thomas 	if (gap->found < gap->lc.lc_stripe_count) {
   1067   9407    Thomas 		/*
   1068   9407    Thomas 		 * Insert in order.
   1069   9407    Thomas 		 */
   1070  10016    Thomas 		for (i = 0; i < gap->found; i++) {
   1071  10016    Thomas 			if ((pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len <
   1072  10016    Thomas 			    gap->lc.lc_mds_sids[i].len) ||
   1073  10016    Thomas 			    (pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len ==
   1074  10016    Thomas 			    gap->lc.lc_mds_sids[i].len &&
   1075  10016    Thomas 			    bcmp(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val,
   1076  10016    Thomas 			    gap->lc.lc_mds_sids[i].val,
   1077  10016    Thomas 			    gap->lc.lc_mds_sids[i].len) < 0)) {
   1078  10016    Thomas 				for (j = gap->found; j > i; j--) {
   1079  10016    Thomas 					gap->lc.lc_mds_sids[j].len =
   1080  10016    Thomas 					    gap->lc.lc_mds_sids[j - 1].len;
   1081  10016    Thomas 					gap->lc.lc_mds_sids[j - 1].val =
   1082  10016    Thomas 					    gap->lc.lc_mds_sids[j].val;
   1083  10016    Thomas 				}
   1084   9407    Thomas 
   1085   9407    Thomas 				break;
   1086   9407    Thomas 			}
   1087   9407    Thomas 		}
   1088   9407    Thomas 
   1089   9407    Thomas 		/*
   1090  10016    Thomas 		 * Either we found it and i is where it goes or we didn't
   1091  10016    Thomas 		 * find it and i is the tail. Either way, same thing happens!
   1092  10016    Thomas 		 */
   1093  10016    Thomas 		gap->lc.lc_mds_sids[i].len =
   1094  10016    Thomas 		    pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_len;
   1095  10016    Thomas 		gap->lc.lc_mds_sids[i].val =
   1096  10016    Thomas 		    kmem_alloc(gap->lc.lc_mds_sids[i].len, KM_SLEEP);
   1097  10016    Thomas 		bcopy(pgi->ds_guid.ds_guid_u.zfsguid.zfsguid_val,
   1098  10016    Thomas 		    gap->lc.lc_mds_sids[i].val,
   1099  10016    Thomas 		    gap->lc.lc_mds_sids[i].len);
   1100  10016    Thomas 
   1101  10016    Thomas 		gap->found++;
   1102  10016    Thomas 	}
   1103  10016    Thomas }
   1104  10016    Thomas 
   1105  10016    Thomas int mds_default_stripe = 32;
   1106  10016    Thomas 
   1107  10016    Thomas mds_layout_t *
   1108  10016    Thomas mds_gen_default_layout(nfs_server_instance_t *instp)
   1109  10016    Thomas {
   1110  10016    Thomas 	struct mds_gather_args	gap;
   1111  10016    Thomas 	mds_layout_t		*lp;
   1112  10016    Thomas 
   1113  10016    Thomas 	int			i;
   1114  10016    Thomas 
   1115  10016    Thomas 	bzero(&gap, sizeof (gap));
   1116  10016    Thomas 
   1117  10016    Thomas 	gap.found = 0;
   1118  10016    Thomas 
   1119  10016    Thomas 	rw_enter(&instp->ds_guid_info_lock, RW_READER);
   1120  10016    Thomas 	gap.lc.lc_stripe_count = instp->ds_guid_info_count;
   1121  10016    Thomas 	rw_exit(&instp->ds_guid_info_lock);
   1122  10016    Thomas 
   1123  10016    Thomas 	gap.lc.lc_mds_sids = kmem_zalloc(gap.lc.lc_stripe_count *
   1124  10016    Thomas 	    sizeof (mds_sid), KM_SLEEP);
   1125  10016    Thomas 
   1126  10016    Thomas 	rw_enter(&instp->ds_guid_info_lock, RW_READER);
   1127  10016    Thomas 	rfs4_dbe_walk(instp->ds_guid_info_tab, mds_gather_mds_sids, &gap);
   1128  10016    Thomas 	rw_exit(&instp->ds_guid_info_lock);
   1129  10016    Thomas 
   1130  10016    Thomas 	/*
   1131  10016    Thomas 	 * If we didn't find any devices then we do no service
   1132  10016    Thomas 	 */
   1133  10016    Thomas 	if (gap.found == 0) {
   1134  10016    Thomas 		kmem_free(gap.lc.lc_mds_sids, gap.lc.lc_stripe_count *
   1135  10016    Thomas 		    sizeof (mds_sid));
   1136  10016    Thomas 		return (NULL);
   1137  10016    Thomas 	}
   1138  10016    Thomas 
   1139  10016    Thomas 	/*
   1140  10016    Thomas 	 * XXX: What if found != stripe_count ?
   1141  10016    Thomas 	 */
   1142  10016    Thomas 
   1143  10016    Thomas 	gap.lc.lc_stripe_unit = mds_default_stripe * 1024;
   1144  10016    Thomas 
   1145  10016    Thomas 	rw_enter(&instp->mds_layout_lock, RW_WRITER);
   1146  10016    Thomas 	lp = (mds_layout_t *)rfs4_dbcreate(instp->mds_layout_idx,
   1147  10016    Thomas 	    (void *)&gap.lc);
   1148  10016    Thomas 	if (lp) {
   1149  10016    Thomas 		instp->mds_layout_default_idx = lp->mlo_id;
   1150  10016    Thomas 	}
   1151  10016    Thomas 	rw_exit(&instp->mds_layout_lock);
   1152  10016    Thomas 
   1153  10016    Thomas 	for (i = 0; i < gap.lc.lc_stripe_count; i++) {
   1154  10016    Thomas 		kmem_free(gap.lc.lc_mds_sids[i].val,
   1155  10016    Thomas 		    gap.lc.lc_mds_sids[i].len);
   1156  10016    Thomas 	}
   1157  10016    Thomas 
   1158  10016    Thomas 	kmem_free(gap.lc.lc_mds_sids, gap.lc.lc_stripe_count *
   1159  10016    Thomas 	    sizeof (mds_sid));
   1160  10016    Thomas 	return (lp);
   1161  10016    Thomas }
   1162  10016    Thomas 
   1163  10016    Thomas /* ================================================================ */
   1164  10016    Thomas 
   1165  10016    Thomas 
   1166  10016    Thomas /*
   1167  10016    Thomas  * Given a layout, which now is comprised of mds_dataset_ids, instead of
   1168  10016    Thomas  * devices, generate the list of devices...
   1169  10016    Thomas  */
   1170  10016    Thomas static mds_mpd_t *
   1171  10016    Thomas mds_gen_mpd(nfs_server_instance_t *instp, mds_layout_t *lp)
   1172  10016    Thomas {
   1173  10016    Thomas 	nfsv4_1_file_layout_ds_addr4	ds_dev;
   1174   6741  th199096 
   1175   9404    Thomas 	/*
   1176   9404    Thomas 	 * The key to understanding the way these data structures
   1177   9404    Thomas 	 * interact is that map points to ds_dev. And map is stuck
   1178   9404    Thomas 	 * into the mds_mpd_idx database.
   1179   9404    Thomas 	 */
   1180  10016    Thomas 	mds_addmpd_t	map = { .id = 0, .ds_addr4 = &ds_dev };
   1181  10016    Thomas 	mds_mpd_t	*mp = NULL;
   1182  10016    Thomas 	uint_t		len;
   1183  10016    Thomas 	int		 i, iLoaded = 0;
   1184  10016    Thomas 	uint32_t	*sivp;
   1185  10016    Thomas 	multipath_list4	*mplp;
   1186  10016    Thomas 
   1187  10016    Thomas 	ds_addrlist_t	**adp = NULL;
   1188  10016    Thomas 
   1189  10016    Thomas 	ASSERT(instp->mds_mpd_id_space != NULL);
   1190  10016    Thomas 	map.id = id_alloc(instp->mds_mpd_id_space);
   1191   6741  th199096 
   1192   6741  th199096 	/*
   1193   6741  th199096 	 * build a nfsv4_1_file_layout_ds_addr4, encode it and
   1194   6741  th199096 	 * cache it in state_store.
   1195   6741  th199096 	 */
   1196  10016    Thomas 	len = lp->mlo_lc.lc_stripe_count;
   1197   6741  th199096 
   1198   6741  th199096 	/* allocate space for the indices */
   1199   6741  th199096 	sivp = ds_dev.nflda_stripe_indices.nflda_stripe_indices_val =
   1200   6741  th199096 	    kmem_zalloc(len * sizeof (uint32_t), KM_SLEEP);
   1201   6741  th199096 
   1202   6741  th199096 	ds_dev.nflda_stripe_indices.nflda_stripe_indices_len = len;
   1203   6741  th199096 
   1204   6741  th199096 	/* populate the stripe indices */
   1205  10016    Thomas 	for (i = 0; i < len; i++)
   1206  10016    Thomas 		sivp[i] = i;
   1207   6741  th199096 
   1208   6741  th199096 	/*
   1209   6741  th199096 	 * allocate space for the multipath_list4 (for now we just
   1210   6741  th199096 	 * have the one path)
   1211   6741  th199096 	 */
   1212   6741  th199096 	mplp = ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_val =
   1213   6741  th199096 	    kmem_zalloc(len * sizeof (multipath_list4), KM_SLEEP);
   1214   6741  th199096 
   1215   6741  th199096 	ds_dev.nflda_multipath_ds_list.nflda_multipath_ds_list_len = len;
   1216   6741  th199096 
   1217  10016    Thomas 	adp = kmem_zalloc(len * sizeof (ds_addrlist_t *), KM_SLEEP);
   1218  10016    Thomas 
   1219   6741  th199096 	/*
   1220   7739   jwahlig 	 * Now populate the netaddrs using the stashed ds_addr
   1221   6741  th199096 	 * pointers
   1222   6741  th199096 	 */
   1223  10016    Thomas 	for (i = 0; i < len; i++) {
   1224  10016    Thomas 		ds_addrlist_t	*dp;
   1225  10016    Thomas 
   1226  10016    Thomas 		mplp[i].multipath_list4_len = 1;
   1227  10016    Thomas 		dp = mds_find_ds_addrlist_by_mds_sid(instp,
   1228  10016    Thomas 		    &lp->mlo_lc.lc_mds_sids[i]);
   1229  10016    Thomas 		if (!dp) {
   1230  10016    Thomas 			iLoaded = i;
   1231  10016    Thomas 			goto cleanup;
   1232  10016    Thomas 		}
   1233  10016    Thomas 
   1234  10016    Thomas 		mplp[i].multipath_list4_val = &dp->dev_addr;
   1235  10016    Thomas 		adp[i] = dp;
   1236  10016    Thomas 	}
   1237  10016    Thomas 
   1238  10016    Thomas 	iLoaded = len;
   1239   6741  th199096 
   1240   6741  th199096 	/*
   1241   6741  th199096 	 * Add the multipath_list4, this will encode and cache
   1242   6741  th199096 	 * the result.
   1243   6741  th199096 	 */
   1244   7739   jwahlig 	rw_enter(&instp->mds_mpd_lock, RW_WRITER);
   1245  10016    Thomas 
   1246  10016    Thomas 	/*
   1247  10016    Thomas 	 * XXX: Each layout has its own mpd.
   1248  10016    Thomas 	 *
   1249  10016    Thomas 	 * Note that we should fix this....
   1250  10016    Thomas 	 */
   1251   7739   jwahlig 	mp = (mds_mpd_t *)rfs4_dbcreate(instp->mds_mpd_idx, (void *)&map);
   1252  10016    Thomas 	if (mp) {
   1253  10016    Thomas 		lp->mlo_mpd_id = mp->mpd_id;
   1254  10016    Thomas 
   1255  10016    Thomas 		/*
   1256  10016    Thomas 		 * Put the layout on the layouts list.
   1257  10016    Thomas 		 * Note that we don't decrement the refcnt
   1258  10016    Thomas 		 * here, we keep a hold on it for inserting
   1259  10016    Thomas 		 * this layout on it.
   1260  10016    Thomas 		 */
   1261  10016    Thomas 		list_insert_tail(&mp->mpd_layouts_list, lp);
   1262  10016    Thomas 	}
   1263  10016    Thomas 
   1264   7739   jwahlig 	rw_exit(&instp->mds_mpd_lock);
   1265   6741  th199096 
   1266  10016    Thomas cleanup:
   1267  10016    Thomas 
   1268  10016    Thomas 	for (i = 0; i < iLoaded; i++) {
   1269  10016    Thomas 		rfs4_dbe_rele(adp[i]->dbe);
   1270  10016    Thomas 	}
   1271  10016    Thomas 
   1272  10016    Thomas 	kmem_free(adp, len * sizeof (ds_addrlist_t *));
   1273   6741  th199096 	kmem_free(mplp, len * sizeof (multipath_list4));
   1274   6741  th199096 	kmem_free(sivp, len * sizeof (uint32_t));
   1275  10016    Thomas 
   1276  10016    Thomas 	if (mp == NULL)
   1277  10016    Thomas 		id_free(instp->mds_mpd_id_space, map.id);
   1278  10016    Thomas 
   1279   6741  th199096 	return (mp);
   1280   6741  th199096 }
   1281   6741  th199096 
   1282  10016    Thomas void
   1283  10016    Thomas mds_nuke_layout(nfs_server_instance_t *instp, uint32_t mlo_id)
   1284   6741  th199096 {
   1285   6741  th199096 	bool_t create = FALSE;
   1286   6741  th199096 	rfs4_entry_t e;
   1287   6741  th199096 
   1288   7739   jwahlig 	rw_enter(&instp->mds_layout_lock, RW_WRITER);
   1289  10016    Thomas 	if ((e = rfs4_dbsearch(instp->mds_layout_ID_idx,
   1290  10016    Thomas 	    (void *)(uintptr_t)mlo_id,
   1291   7739   jwahlig 	    &create,
   1292   7739   jwahlig 	    NULL,
   1293   7739   jwahlig 	    RFS4_DBS_VALID)) != NULL) {
   1294   6741  th199096 		rfs4_dbe_invalidate(e->dbe);
   1295   9404    Thomas 		rfs4_dbe_rele(e->dbe);
   1296   6741  th199096 	}
   1297   7739   jwahlig 	rw_exit(&instp->mds_layout_lock);
   1298   6741  th199096 }
   1299   6741  th199096 
   1300   6741  th199096 /*ARGSUSED*/
   1301   6741  th199096 static bool_t
   1302   7739   jwahlig mds_layout_create(rfs4_entry_t u_entry, void *arg)
   1303   6741  th199096 {
   1304  10016    Thomas 	mds_layout_t	*lp = (mds_layout_t *)u_entry;
   1305  10016    Thomas 	layout_core_t	*lc = (layout_core_t *)arg;
   1306  10016    Thomas 
   1307  10016    Thomas 	nfs_server_instance_t *instp;
   1308  10016    Thomas 	int i;
   1309   9404    Thomas 	bool_t rc = TRUE;
   1310   6741  th199096 
   1311  10447    Thomas 	instp = dbe_to_instp(lp->mlo_dbe);
   1312  10447    Thomas 
   1313  10447    Thomas 	lp->mlo_id = rfs4_dbe_getid(lp->mlo_dbe);
   1314  10016    Thomas 
   1315  10016    Thomas 	lp->mlo_type = LAYOUT4_NFSV4_1_FILES;
   1316  10016    Thomas 	lp->mlo_lc.lc_stripe_unit = lc->lc_stripe_unit;
   1317  10016    Thomas 	lp->mlo_lc.lc_stripe_count = lc->lc_stripe_count;
   1318  10016    Thomas 
   1319  10016    Thomas 	lp->mlo_lc.lc_mds_sids = kmem_zalloc(lp->mlo_lc.lc_stripe_count *
   1320  10016    Thomas 	    sizeof (mds_sid), KM_SLEEP);
   1321  10016    Thomas 
   1322  10016    Thomas 	for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) {
   1323  10016    Thomas 		lp->mlo_lc.lc_mds_sids[i].len = lc->lc_mds_sids[i].len;
   1324  10016    Thomas 		lp->mlo_lc.lc_mds_sids[i].val =
   1325  10016    Thomas 		    kmem_alloc(lp->mlo_lc.lc_mds_sids[i].len, KM_SLEEP);
   1326  10016    Thomas 		bcopy(lc->lc_mds_sids[i].val, lp->mlo_lc.lc_mds_sids[i].val,
   1327  10016    Thomas 		    lp->mlo_lc.lc_mds_sids[i].len);
   1328  10016    Thomas 	}
   1329   6741  th199096 
   1330   6741  th199096 	/* Need to generate a device for this layout */
   1331  10016    Thomas 	lp->mlo_mpd = mds_gen_mpd(instp, lp);
   1332  10016    Thomas 	if (lp->mlo_mpd == NULL) {
   1333  10016    Thomas 		for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) {
   1334  10016    Thomas 			kmem_free(lp->mlo_lc.lc_mds_sids[i].val,
   1335  10016    Thomas 			    lp->mlo_lc.lc_mds_sids[i].len);
   1336  10016    Thomas 		}
   1337  10016    Thomas 
   1338  10016    Thomas 		kmem_free(lp->mlo_lc.lc_mds_sids, lp->mlo_lc.lc_stripe_count *
   1339  10016    Thomas 		    sizeof (mds_sid));
   1340  10016    Thomas 		lp->mlo_lc.lc_mds_sids = NULL;
   1341  10016    Thomas 		rc = FALSE;
   1342   9404    Thomas 	}
   1343   9404    Thomas 
   1344   9404    Thomas 	return (rc);
   1345   6741  th199096 }
   1346   6741  th199096 
   1347   6741  th199096 /*ARGSUSED*/
   1348   6741  th199096 static void
   1349   9404    Thomas mds_layout_destroy(rfs4_entry_t u_entry)
   1350   6741  th199096 {
   1351  10016    Thomas 	mds_layout_t		*lp = (mds_layout_t *)u_entry;
   1352  10016    Thomas 	nfs_server_instance_t	*instp;
   1353  10016    Thomas 	int			i;
   1354  10016    Thomas 
   1355  10016    Thomas 	instp = dbe_to_instp(u_entry->dbe);
   1356  10016    Thomas 
   1357  10016    Thomas 	rw_enter(&instp->mds_mpd_lock, RW_WRITER);
   1358  10016    Thomas 	if (lp->mlo_mpd != NULL) {
   1359  10016    Thomas 		list_remove(&lp->mlo_mpd->mpd_layouts_list, lp);
   1360  10447    Thomas 		rfs4_dbe_rele(lp->mlo_mpd->mpd_dbe);
   1361  10016    Thomas 		lp->mlo_mpd = NULL;
   1362  10016    Thomas 	}
   1363  10016    Thomas 	rw_exit(&instp->mds_mpd_lock);
   1364  10016    Thomas 
   1365  10016    Thomas 	if (lp->mlo_lc.lc_mds_sids != NULL) {
   1366  10016    Thomas 		for (i = 0; i < lp->mlo_lc.lc_stripe_count; i++) {
   1367  10016    Thomas 			kmem_free(lp->mlo_lc.lc_mds_sids[i].val,
   1368  10016    Thomas 			    lp->mlo_lc.lc_mds_sids[i].len);
   1369  10016    Thomas 		}
   1370  10016    Thomas 
   1371  10016    Thomas 		kmem_free(lp->mlo_lc.lc_mds_sids, lp->mlo_lc.lc_stripe_count *
   1372  10016    Thomas 		    sizeof (mds_sid));
   1373  10016    Thomas 		lp->mlo_lc.lc_mds_sids = NULL;
   1374  10016    Thomas 	}
   1375   6741  th199096 }
   1376   6741  th199096 
   1377   9215     James mds_layout_t *
   1378  10016    Thomas mds_add_layout(layout_core_t *lc)
   1379   6741  th199096 {
   1380   6741  th199096 	bool_t create = FALSE;
   1381  10016    Thomas 	mds_layout_t *lp;
   1382   6741  th199096 
   1383   7739   jwahlig 	rw_enter(&mds_server->mds_layout_lock, RW_WRITER);
   1384   6741  th199096 
   1385  10016    Thomas 	/*
   1386  10016    Thomas 	 * If it is already in memory, then we can just
   1387  10016    Thomas 	 * bump the refcnt.
   1388  10016    Thomas 	 */
   1389  10016    Thomas 	lp = (mds_layout_t *)rfs4_dbsearch(mds_server->mds_layout_idx,
   1390  10016    Thomas 	    (void *)lc, &create, NULL,
   1391  10016    Thomas 	    RFS4_DBS_VALID);
   1392  10016    Thomas 	if (lp != NULL) {
   1393  10016    Thomas 		rw_exit(&mds_server->mds_layout_lock);
   1394  10016    Thomas 		return (lp);
   1395  10016    Thomas 	}
   1396  10016    Thomas 
   1397  10016    Thomas 	lp = (mds_layout_t *)rfs4_dbcreate(mds_server->mds_layout_idx,
   1398  10016    Thomas 	    (void *)lc);
   1399   9215     James 	rw_exit(&mds_server->mds_layout_lock);
   1400  10016    Thomas 
   1401  10016    Thomas 	if (lp == NULL) {
   1402   6741  th199096 		printf("mds_add_layout: failed\n");
   1403   6741  th199096 		(void) set_errno(EFAULT);
   1404   6741  th199096 	}
   1405   6741  th199096 
   1406  10016    Thomas 	return (lp);
   1407   6741  th199096 }
   1408   6741  th199096 
   1409   6741  th199096 #define	ADDRHASH(key) ((unsigned long)(key) >> 3)
   1410   6741  th199096 
   1411   6741  th199096 /*
   1412   6741  th199096  * -----------------------------------------------
   1413   6741  th199096  * MDS: Layout Grant tables.
   1414   6741  th199096  * -----------------------------------------------
   1415   6741  th199096  *
   1416   6741  th199096  */
   1417   6741  th199096 static uint32_t
   1418   6741  th199096 mds_layout_grant_hash(void *key)
   1419   6741  th199096 {
   1420  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)key;
   1421  10447    Thomas 
   1422  10447    Thomas 	return (ADDRHASH(lg->lo_cp) ^ ADDRHASH(lg->lo_fp));
   1423   6741  th199096 }
   1424   6741  th199096 
   1425   6741  th199096 static bool_t
   1426   7739   jwahlig mds_layout_grant_compare(rfs4_entry_t u_entry, void *key)
   1427   6741  th199096 {
   1428  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)u_entry;
   1429  10447    Thomas 	mds_layout_grant_t *klg = (mds_layout_grant_t *)key;
   1430  10447    Thomas 
   1431  10447    Thomas 	return (lg->lo_cp == klg->lo_cp && lg->lo_fp == klg->lo_fp);
   1432   6741  th199096 }
   1433   6741  th199096 
   1434   6741  th199096 static void *
   1435   6741  th199096 mds_layout_grant_mkkey(rfs4_entry_t entry)
   1436   6741  th199096 {
   1437   6741  th199096 	return (entry);
   1438   6741  th199096 }
   1439   6741  th199096 
   1440   8981     James #ifdef NOT_USED_NOW
   1441   6741  th199096 static uint32_t
   1442   7739   jwahlig mds_layout_grant_id_hash(void *key)
   1443   6741  th199096 {
   1444   7739   jwahlig 	stateid_t *id = (stateid_t *)key;
   1445   7739   jwahlig 
   1446   7739   jwahlig 	return (id->v41_bits.state_ident);
   1447   6741  th199096 }
   1448   6741  th199096 
   1449   6741  th199096 static bool_t
   1450   7739   jwahlig mds_layout_grant_id_compare(rfs4_entry_t entry, void *key)
   1451   6741  th199096 {
   1452  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)entry;
   1453   7739   jwahlig 	stateid_t *id = (stateid_t *)key;
   1454   7739   jwahlig 	bool_t rc;
   1455   7739   jwahlig 
   1456   7739   jwahlig 	if (id->v41_bits.type != LAYOUTID)
   1457   7739   jwahlig 		return (FALSE);
   1458   7739   jwahlig 
   1459  10447    Thomas 	rc = (lg->lo_stateid.v41_bits.boottime == id->v41_bits.boottime &&
   1460  10447    Thomas 	    lg->lo_stateid.v41_bits.state_ident == id->v41_bits.state_ident);
   1461   7739   jwahlig 
   1462   7739   jwahlig 	return (rc);
   1463   6741  th199096 }
   1464   6741  th199096 
   1465   6741  th199096 static void *
   1466   7739   jwahlig mds_layout_grant_id_mkkey(rfs4_entry_t entry)
   1467   6741  th199096 {
   1468  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)entry;
   1469  10447    Thomas 
   1470  10447    Thomas 	return (&lg->lo_stateid);
   1471   6741  th199096 }
   1472   8981     James #endif
   1473   7739   jwahlig 
   1474   6741  th199096 /*ARGSUSED*/
   1475   6741  th199096 static bool_t
   1476   7739   jwahlig mds_layout_grant_create(rfs4_entry_t u_entry, void *arg)
   1477   6741  th199096 {
   1478  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)u_entry;
   1479  10447    Thomas 	rfs4_file_t *fp = ((mds_layout_grant_t *)arg)->lo_fp;
   1480  10447    Thomas 	rfs4_client_t *cp = ((mds_layout_grant_t *)arg)->lo_cp;
   1481  10447    Thomas 
   1482  10455    Thomas 	/*
   1483  10455    Thomas 	 * We hold onto the rfs4_file_t until we are done with it.
   1484  10455    Thomas 	 */
   1485  10447    Thomas 	rfs4_dbe_hold(fp->rf_dbe);
   1486  10447    Thomas 
   1487  10447    Thomas 	lg->lo_status = LO_GRANTED;
   1488  10447    Thomas 	lg->lo_stateid = mds_create_stateid(lg->lo_dbe, LAYOUTID);
   1489  10447    Thomas 	lg->lo_fp = fp;
   1490  10447    Thomas 	lg->lo_cp = cp;
   1491  10447    Thomas 	lg->lor_seqid = lg->lor_reply = 0;
   1492  10447    Thomas 	mutex_init(&lg->lo_lock, NULL, MUTEX_DEFAULT, NULL);
   1493   7739   jwahlig 
   1494   7739   jwahlig 	/* Init layout grant lists for remque/insque */
   1495  10447    Thomas 	lg->lo_grant_list.next = lg->lo_grant_list.prev =
   1496  10447    Thomas 	    &lg->lo_grant_list;
   1497  10447    Thomas 	lg->lo_grant_list.lg = lg;
   1498  10447    Thomas 
   1499  10447    Thomas 	lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev =
   1500  10447    Thomas 	    &lg->lo_clientgrantlist;
   1501  10447    Thomas 	lg->lo_clientgrantlist.lg = lg;
   1502  10447    Thomas 
   1503  10447    Thomas 	lg->lo_range = nfs_range_create();
   1504   8427     James 
   1505   6741  th199096 	return (TRUE);
   1506   6741  th199096 }
   1507   6741  th199096 
   1508   6741  th199096 /*ARGSUSED*/
   1509   6741  th199096 static void
   1510   8041      rick mds_layout_grant_destroy(rfs4_entry_t entry)
   1511   6741  th199096 {
   1512  10447    Thomas 	mds_layout_grant_t *lg = (mds_layout_grant_t *)entry;
   1513  10455    Thomas 
   1514  10455    Thomas 	/*
   1515  10455    Thomas 	 * The code which invalidated this node should have
   1516  10455    Thomas 	 * gone ahead and released the rfs4_file_t.
   1517  10455    Thomas 	 */
   1518  10461    Thomas 	ASSERT(lg->lo_fp == NULL);
   1519  10447    Thomas 
   1520  10447    Thomas 	mutex_destroy(&lg->lo_lock);
   1521  10447    Thomas 
   1522  10447    Thomas 	nfs_range_destroy(lg->lo_range);
   1523  10447    Thomas 	lg->lo_range = NULL;
   1524   6741  th199096 }
   1525   6741  th199096 
   1526   7739   jwahlig mds_layout_grant_t *
   1527   7739   jwahlig rfs41_findlogrant(struct compound_state *cs, rfs4_file_t *fp,
   1528   7739   jwahlig     rfs4_client_t *cp, bool_t *create)
   1529   7739   jwahlig {
   1530  10447    Thomas 	mds_layout_grant_t args, *lg;
   1531  10447    Thomas 
   1532  10447    Thomas 	args.lo_cp = cp;
   1533  10447    Thomas 	args.lo_fp = fp;
   1534  10447    Thomas 
   1535  10447    Thomas 	lg = (mds_layout_grant_t *)rfs4_dbsearch(
   1536  10447    Thomas 	    cs->instp->mds_layout_grant_idx, &args, create,
   1537  10447    Thomas 	    &args, RFS4_DBS_VALID);
   1538  10447    Thomas 
   1539  10447    Thomas 	return (lg);
   1540  10447    Thomas }
   1541  10447    Thomas 
   1542  10447    Thomas void
   1543  10447    Thomas rfs41_lo_grant_hold(mds_layout_grant_t *lg)
   1544  10447    Thomas {
   1545  10447    Thomas 	rfs4_dbe_hold(lg->lo_dbe);
   1546  10447    Thomas }
   1547  10447    Thomas 
   1548  10447    Thomas void
   1549  10447    Thomas rfs41_lo_grant_rele(mds_layout_grant_t *lg)
   1550  10447    Thomas {
   1551  10447    Thomas 	rfs4_dbe_rele(lg->lo_dbe);
   1552   7812     James }
   1553   7812     James 
   1554   7812     James /*
   1555   7812     James  * -----------------------------------------------
   1556   7812     James  * MDS: Ever Grant tables.
   1557   7812     James  * -----------------------------------------------
   1558   7812     James  *
   1559   7812     James  */
   1560   7812     James static uint32_t
   1561   7812     James mds_ever_grant_hash(void *key)
   1562   7812     James {
   1563  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)key;
   1564  10447    Thomas 
   1565  10447    Thomas 	return (ADDRHASH(eg->eg_cp) ^ ADDRHASH(eg->eg_key));
   1566   7812     James }
   1567   7812     James 
   1568   7812     James static bool_t
   1569   7812     James mds_ever_grant_compare(rfs4_entry_t u_entry, void *key)
   1570   7812     James {
   1571  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry;
   1572  10447    Thomas 	mds_ever_grant_t *keg = (mds_ever_grant_t *)key;
   1573  10447    Thomas 
   1574  10447    Thomas 	return (eg->eg_cp == keg->eg_cp &&
   1575  10447    Thomas 	    eg->eg_fsid.val[0] == keg->eg_fsid.val[0] &&
   1576  10447    Thomas 	    eg->eg_fsid.val[1] == keg->eg_fsid.val[1]);
   1577   7812     James }
   1578   7812     James 
   1579   7812     James static void *
   1580   7812     James mds_ever_grant_mkkey(rfs4_entry_t entry)
   1581   7812     James {
   1582   7812     James 	return (entry);
   1583   7812     James }
   1584   7812     James 
   1585   7812     James static bool_t
   1586   7812     James mds_ever_grant_fsid_compare(rfs4_entry_t entry, void *key)
   1587   7812     James {
   1588  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)entry;
   1589   7812     James 	int64_t g_key = (int64_t)(uintptr_t)key;
   1590   7812     James 
   1591  10447    Thomas 	return (eg->eg_key == g_key);
   1592   7812     James }
   1593   7812     James 
   1594   8981     James #ifdef NOT_USED_NOW
   1595   8981     James static uint32_t
   1596   8981     James mds_ever_grant_fsid_hash(void *key)
   1597   8981     James {
   1598   8981     James 	return ((uint32_t)(uintptr_t)key);
   1599   8981     James }
   1600   8981     James 
   1601   7812     James static void *
   1602   7812     James mds_ever_grant_fsid_mkkey(rfs4_entry_t entry)
   1603   7812     James {
   1604  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)entry;
   1605  10447    Thomas 
   1606  10447    Thomas 	return ((void*)(uintptr_t)eg->eg_key);
   1607   7812     James }
   1608   8981     James #endif
   1609   7812     James 
   1610   7812     James /*ARGSUSED*/
   1611   7812     James static bool_t
   1612   7812     James mds_ever_grant_create(rfs4_entry_t u_entry, void *arg)
   1613   7812     James {
   1614  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry;
   1615  10447    Thomas 	rfs4_client_t *cp = ((mds_ever_grant_t *)arg)->eg_cp;
   1616  10447    Thomas 
   1617  10447    Thomas 	eg->eg_cp = cp;
   1618  10447    Thomas 	eg->eg_fsid = ((mds_ever_grant_t *)arg)->eg_fsid;
   1619   7812     James 
   1620   7812     James 	return (TRUE);
   1621   7812     James }
   1622   7812     James 
   1623   7812     James /*ARGSUSED*/
   1624   7812     James static void
   1625   7812     James mds_ever_grant_destroy(rfs4_entry_t foo)
   1626   7812     James {
   1627   7812     James }
   1628   7812     James 
   1629   7812     James mds_ever_grant_t *
   1630   7812     James rfs41_findevergrant(rfs4_client_t *cp, vnode_t *vp, bool_t *create)
   1631   7812     James {
   1632   7812     James 	nfs_server_instance_t *instp;
   1633  10447    Thomas 	mds_ever_grant_t args, *eg;
   1634  10447    Thomas 
   1635  10447    Thomas 	instp = dbe_to_instp(cp->rc_dbe);
   1636  10447    Thomas 	args.eg_cp = cp;
   1637  10447    Thomas 	args.eg_fsid = vp->v_vfsp->vfs_fsid;
   1638  10447    Thomas 
   1639  10447    Thomas 	eg = (mds_ever_grant_t *)rfs4_dbsearch(
   1640  10447    Thomas 	    instp->mds_ever_grant_idx, &args, create, &args,
   1641  10447    Thomas 	    RFS4_DBS_VALID);
   1642  10447    Thomas 
   1643  10447    Thomas 	return (eg);
   1644  10447    Thomas }
   1645  10447    Thomas 
   1646  10447    Thomas void
   1647  10447    Thomas rfs41_ever_grant_rele(mds_ever_grant_t *eg)
   1648  10447    Thomas {
   1649  10447    Thomas 	rfs4_dbe_rele(eg->eg_dbe);
   1650   7812     James }
   1651   7812     James 
   1652   7812     James void
   1653   8439     James mds_kill_eg_callout(rfs4_entry_t u_entry, void *arg)
   1654   8439     James {
   1655  10447    Thomas 	mds_ever_grant_t *eg = (mds_ever_grant_t *)u_entry;
   1656   8439     James 	rfs4_client_t *cp = (rfs4_client_t *)arg;
   1657   8439     James 
   1658  10447    Thomas 	if (eg->eg_cp == cp) {
   1659  10447    Thomas 		eg->eg_cp = NULL;
   1660  10447    Thomas 		rfs4_dbe_invalidate(eg->eg_dbe);
   1661  10447    Thomas 		rfs4_dbe_rele_nolock(eg->eg_dbe);
   1662   8439     James 	}
   1663   8439     James }
   1664   8439     James 
   1665   8439     James void
   1666   7812     James mds_clean_up_grants(rfs4_client_t *cp)
   1667   7812     James {
   1668  10447    Thomas 	mds_layout_grant_t *lg;
   1669  10447    Thomas 	nfs_server_instance_t *instp;
   1670  10447    Thomas 
   1671  10447    Thomas 	rfs4_dbe_lock(cp->rc_dbe);
   1672  10447    Thomas 	while (cp->rc_clientgrantlist.next->lg != NULL) {
   1673  10447    Thomas 		lg = cp->rc_clientgrantlist.next->lg;
   1674  10447    Thomas 		remque(&lg->lo_clientgrantlist);
   1675  10447    Thomas 		lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev =
   1676  10447    Thomas 		    &lg->lo_clientgrantlist;
   1677  10447    Thomas 		lg->lo_cp = NULL;
   1678  10447    Thomas 
   1679  10447    Thomas 		rfs4_dbe_lock(lg->lo_fp->rf_dbe);
   1680  10447    Thomas 		remque(&lg->lo_grant_list);
   1681  10447    Thomas 		rfs4_dbe_unlock(lg->lo_fp->rf_dbe);
   1682  10447    Thomas 
   1683  10447    Thomas 		lg->lo_grant_list.next = lg->lo_grant_list.prev =
   1684  10447    Thomas 		    &lg->lo_grant_list;
   1685  10447    Thomas 		rfs4_file_rele(lg->lo_fp);
   1686  10447    Thomas 
   1687  10447    Thomas 		lg->lo_fp = NULL;
   1688  10447    Thomas 		rfs4_dbe_invalidate(lg->lo_dbe);
   1689  10447    Thomas 		rfs41_lo_grant_rele(lg);
   1690  10447    Thomas 	}
   1691  10447    Thomas 
   1692  10447    Thomas 	instp = dbe_to_instp(cp->rc_dbe);
   1693  10447    Thomas 	rfs4_dbe_unlock(cp->rc_dbe);
   1694   9215     James 
   1695   9215     James 	rw_enter(&instp->mds_ever_grant_lock, RW_READER);
   1696   8439     James 	rfs4_dbe_walk(instp->mds_ever_grant_tab, mds_kill_eg_callout, cp);
   1697   9215     James 	rw_exit(&instp->mds_ever_grant_lock);
   1698   9215     James }
   1699   9215     James 
   1700   9215     James struct grant_arg {
   1701   9215     James 	rfs4_client_t *cp;
   1702   9215     James 	vnode_t *vp;
   1703   9215     James };
   1704   9215     James 
   1705   9215     James void
   1706   9215     James mds_rm_grant_callout(rfs4_entry_t u_entry, void *arg)
   1707   9215     James {
   1708  10455    Thomas 	mds_layout_grant_t	*lg = (mds_layout_grant_t *)u_entry;
   1709  10455    Thomas 	struct grant_arg	*ga = (struct grant_arg *)arg;
   1710  10455    Thomas 	vnode_t			*vp;
   1711  10455    Thomas 
   1712  10455    Thomas 	if (rfs4_dbe_skip_or_invalid(lg->lo_dbe)) {
   1713  10455    Thomas 		ASSERT(lg->lo_fp == NULL);
   1714  10455    Thomas 		return;
   1715  10455    Thomas 	}
   1716  10455    Thomas 
   1717  10455    Thomas 	ASSERT(lg->lo_fp != NULL);
   1718  10455    Thomas 	vp = lg->lo_fp->rf_vp;
   1719  10447    Thomas 
   1720  10447    Thomas 	if (ga->cp == lg->lo_cp && vp && ga->vp->v_vfsp == vp->v_vfsp) {
   1721  10447    Thomas 		rfs4_dbe_lock(lg->lo_cp->rc_dbe);
   1722  10447    Thomas 		remque(&lg->lo_clientgrantlist);
   1723  10447    Thomas 		rfs4_dbe_unlock(lg->lo_cp->rc_dbe);
   1724  10447    Thomas 
   1725  10447    Thomas 		lg->lo_clientgrantlist.next = lg->lo_clientgrantlist.prev =
   1726  10447    Thomas 		    &lg->lo_clientgrantlist;
   1727  10447    Thomas 		lg->lo_cp = NULL;
   1728  10447    Thomas 
   1729  10447    Thomas 		rfs4_dbe_lock(lg->lo_fp->rf_dbe);
   1730  10447    Thomas 		remque(&lg->lo_grant_list);
   1731  10447    Thomas 		rfs4_dbe_unlock(lg->lo_fp->rf_dbe);
   1732  10447    Thomas 
   1733  10447    Thomas 		lg->lo_grant_list.next = lg->lo_grant_list.prev =
   1734  10447    Thomas 		    &lg->lo_grant_list;
   1735  10447    Thomas 		rfs4_file_rele(lg->lo_fp);
   1736  10447    Thomas 
   1737  10447    Thomas 		lg->lo_fp = NULL;
   1738  10447    Thomas 		rfs4_dbe_invalidate(lg->lo_dbe);
   1739  10447    Thomas 		rfs4_dbe_rele_nolock(lg->lo_dbe);
   1740   9215     James 	}
   1741   9215     James }
   1742   9215     James 
   1743   9215     James void
   1744   9215     James mds_clean_grants_by_fsid(rfs4_client_t *cp, vnode_t *vp)
   1745   9215     James {
   1746   9215     James 	struct grant_arg ga;
   1747   9215     James 	nfs_server_instance_t *instp;
   1748   9215     James 
   1749   9215     James 	ga.cp = cp;
   1750   9215     James 	ga.vp = vp;
   1751  10447    Thomas 	instp = dbe_to_instp(cp->rc_dbe);
   1752   9215     James 
   1753   9215     James 	rw_enter(&instp->mds_layout_grant_lock, RW_READER);
   1754   9215     James 	rfs4_dbe_walk(instp->mds_layout_grant_tab, mds_rm_grant_callout, &ga);
   1755   9215     James 	rw_exit(&instp->mds_layout_grant_lock);
   1756   7739   jwahlig }
   1757   7739   jwahlig 
   1758   8035     James /*
   1759   8035     James  * Conforms to Section 12.5.5.2.1.4 of draft-25
   1760   8035     James  */
   1761   8035     James void
   1762   8035     James rfs41_lo_seqid(stateid_t *sp)
   1763   8035     James {
   1764   8035     James 	if (sp == NULL)
   1765   8035     James 		return;
   1766   8035     James 
   1767   8035     James 	if ((sp->v41_bits.chgseq + 1) & (uint32_t)~0)
   1768   8035     James 		atomic_inc_32(&sp->v41_bits.chgseq);
   1769   8035     James 	else
   1770   8035     James 		(void) atomic_swap_32(&sp->v41_bits.chgseq, 1);
   1771   8035     James }
   1772   8035     James 
   1773   8041      rick bool_t
   1774  10447    Thomas rfs41_lo_still_granted(mds_layout_grant_t *lg)
   1775   8041      rick {
   1776   8441      rick 	bool_t	found = TRUE;
   1777   8041      rick 
   1778   8439     James 	/*
   1779   8439     James 	 * We currently have the layout grant, but is it still valid?
   1780   8439     James 	 * If it has been returned, then the status will be updated as
   1781   8439     James 	 * returned or recalled.  However, it is possible that the client
   1782   8439     James 	 * has gone away while we are still holding this.  When the client
   1783   8439     James 	 * is cleaned up, the pointer to the client and the file will be
   1784   8439     James 	 * set to NULL and it will have been removed from all lists, waiting
   1785   8439     James 	 * to be released and reaped.  In this case, the status may not
   1786   8439     James 	 * have been updated.
   1787   8439     James 	 */
   1788  10447    Thomas 	rfs4_dbe_lock(lg->lo_dbe);
   1789  10447    Thomas 	if (lg->lo_status == LO_RETURNED || lg->lo_status == LO_RECALLED ||
   1790  10447    Thomas 	    lg->lo_cp == NULL)
   1791   8439     James 		found = FALSE;
   1792  10447    Thomas 	rfs4_dbe_unlock(lg->lo_dbe);
   1793   8441      rick 
   1794   8041      rick 	return (found);
   1795   8041      rick }
   1796   8041      rick 
   1797   8041      rick static void
   1798  10447    Thomas rfs41_revoke_layout(mds_layout_grant_t *lg)
   1799   8041      rick {
   1800   8041      rick 	cmn_err(CE_NOTE, "rfs41_revoke_layout: layout revoked");
   1801  10447    Thomas 	rfs41_seq4_hold(&lg->lo_cp->rc_seq4,
   1802  10447    Thomas 	    SEQ4_STATUS_RECALLABLE_STATE_REVOKED);
   1803   8041      rick 
   1804   8041      rick 	/* XXX - rest of this function TBD */
   1805   8041      rick }
   1806   8041      rick 
   1807   6741  th199096 static void
   1808   6741  th199096 mds_do_lorecall(mds_lorec_t *lorec)
   1809   6741  th199096 {
   1810   7397      rick 	CB_COMPOUND4args	 cb4_args;
   1811   7397      rick 	CB_COMPOUND4res		 cb4_res;
   1812   7397      rick 	CB_SEQUENCE4args	*cbsap;
   1813   7397      rick 	CB_LAYOUTRECALL4args	*cblrap;
   1814   7739   jwahlig 	nfs_cb_argop4		*argops;
   1815   7739   jwahlig 	struct timeval		 timeout;
   1816   7739   jwahlig 	enum clnt_stat		 call_stat = RPC_FAILED;
   1817   7739   jwahlig 	int			 zilch = 0;
   1818   6741  th199096 	layoutrecall_file4	*lorf;
   1819   7397      rick 	CLIENT			*ch;
   1820   7397      rick 	int			 numops;
   1821   7397      rick 	int			 argsz;
   1822  10448    Thomas 	mds_session_t		*sp;
   1823   7739   jwahlig 	slot_ent_t		*p;
   1824  10447    Thomas 	mds_layout_grant_t	*lg;
   1825   8041      rick 	uint32_t		 sc = 0;
   1826   8041      rick 	int			 retried = 0;
   1827   6741  th199096 
   1828   6741  th199096 	DTRACE_PROBE1(nfssrv__i__sess_lorecall_fh, mds_lorec_t *, lorec);
   1829  10448    Thomas 	if ((sp = lorec->lor_sess) == NULL) {
   1830   8041      rick 		kmem_free(lorec, sizeof (mds_lorec_t));
   1831   8041      rick 		return;
   1832   6741  th199096 
   1833  10448    Thomas 	} else if (!SN_CB_CHAN_EST(sp)) {
   1834   8041      rick 		kmem_free(lorec, sizeof (mds_lorec_t));
   1835  10448    Thomas 		rfs41_session_rele(sp);
   1836   7397      rick 		return;
   1837   8041      rick 	}
   1838   7397      rick 
   1839   7397      rick 	/*
   1840   8041      rick 	 * Per-type pre-processing
   1841   7512      rick 	 */
   1842   8041      rick 	switch (lorec->lor_type) {
   1843   8041      rick 	case LAYOUTRECALL4_FILE:
   1844  10447    Thomas 		if (lorec->lor_lg == NULL)
   1845   8041      rick 			return;
   1846  10447    Thomas 		lg = lorec->lor_lg;
   1847  10447    Thomas 		break;
   1848  10447    Thomas 
   1849  10447    Thomas 	case LAYOUTRECALL4_FSID:
   1850  10448    Thomas 		sp->sn_clnt->rc_bulk_recall = LAYOUTRETURN4_FSID;
   1851   8041      rick 		break;
   1852   8041      rick 
   1853   8041      rick 	case LAYOUTRECALL4_ALL:
   1854  10448    Thomas 		sp->sn_clnt->rc_bulk_recall = LAYOUTRETURN4_ALL;
   1855   9215     James 		break;
   1856   8041      rick 	default:
   1857   8041      rick 		break;
   1858   8041      rick 	}
   1859   7512      rick 
   1860   7512      rick 	/*
   1861   7397      rick 	 * set up the compound args
   1862   7397      rick 	 */
   1863   7397      rick 	numops = 2;	/* CB_SEQUENCE + CB_LAYOUTRECALL */
   1864   7397      rick 	argsz = numops * sizeof (nfs_cb_argop4);
   1865   7397      rick 	argops = kmem_zalloc(argsz, KM_SLEEP);
   1866   6741  th199096 
   1867   6741  th199096 	argops[0].argop = OP_CB_SEQUENCE;
   1868   7397      rick 	cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence;
   1869   6741  th199096 
   1870   6741  th199096 	argops[1].argop = OP_CB_LAYOUTRECALL;
   1871   7397      rick 	cblrap = &argops[1].nfs_cb_argop4_u.opcblayoutrecall;
   1872   7397      rick 
   1873   7397      rick 	(void) str_to_utf8("cb_lo_recall", &cb4_args.tag);
   1874   7397      rick 	cb4_args.minorversion = CB4_MINOR_v1;
   1875   7397      rick 
   1876  10448    Thomas 	cb4_args.callback_ident = sp->sn_bc.progno;
   1877   7397      rick 	cb4_args.array_len = numops;
   1878   7397      rick 	cb4_args.array = argops;
   1879   6741  th199096 
   1880   6741  th199096 	cb4_res.tag.utf8string_val = NULL;
   1881   6741  th199096 	cb4_res.array = NULL;
   1882   6741  th199096 
   1883   6741  th199096 	/*
   1884   7397      rick 	 * CB_SEQUENCE
   1885   6741  th199096 	 */
   1886  10448    Thomas 	bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4));
   1887  10448    Thomas 	p = svc_slot_alloc(sp);
   1888   7397      rick 	mutex_enter(&p->se_lock);
   1889   7397      rick 	cbsap->csa_slotid = p->se_sltno;
   1890   7397      rick 	cbsap->csa_sequenceid = p->se_seqid;
   1891  10448    Thomas 	cbsap->csa_highest_slotid = svc_slot_maxslot(sp);
   1892   7397      rick 	cbsap->csa_cachethis = FALSE;
   1893   6741  th199096 
   1894   7397      rick 	/* no referring calling list for lo recall */
   1895   7397      rick 	cbsap->csa_rcall_llen = 0;
   1896   7397      rick 	cbsap->csa_rcall_lval = NULL;
   1897   7397      rick 	mutex_exit(&p->se_lock);
   1898   6741  th199096 
   1899   6741  th199096 	/*
   1900   7397      rick 	 * CB_LAYOUTRECALL
   1901   7397      rick 	 *
   1902   6741  th199096 	 * clora_change:
   1903   7397      rick 	 *	1: server prefers that client write modified data through
   1904   7397      rick 	 *	   MDS when pushing modified data due to layout recall
   1905   6741  th199096 	 *	0: server has no DS/MDS preference
   1906   6741  th199096 	 */
   1907   7397      rick 	cblrap->clora_type = LAYOUT4_NFSV4_1_FILES;
   1908   7397      rick 	cblrap->clora_iomode = LAYOUTIOMODE4_ANY;
   1909   7397      rick 	cblrap->clora_changed = 0;
   1910   7397      rick 	cblrap->clora_recall.lor_recalltype = lorec->lor_type;
   1911   6741  th199096 
   1912   6741  th199096 	switch (lorec->lor_type) {
   1913   6741  th199096 	case LAYOUTRECALL4_FILE:
   1914   7397      rick 		lorf = &cblrap->clora_recall.layoutrecall4_u.lor_layout;
   1915   6741  th199096 		lorf->lor_offset = 0;
   1916   6741  th199096 		lorf->lor_length = ONES_64;
   1917   6741  th199096 		lorf->lor_fh.nfs_fh4_len = lorec->lor_fh.fh_len;
   1918   6741  th199096 		lorf->lor_fh.nfs_fh4_val = (char *)&lorec->lor_fh.fh_buf;
   1919   8041      rick 		bcopy(&lorec->lor_stid, &lorf->lor_stateid, sizeof (stateid4));
   1920  10447    Thomas 		(void) atomic_swap_32(&lg->lor_reply, 0);
   1921   6741  th199096 		break;
   1922   6741  th199096 
   1923   6741  th199096 	case LAYOUTRECALL4_FSID:
   1924   7397      rick 		cblrap->clora_recall.layoutrecall4_u.lor_fsid = lorec->lor_fsid;
   1925   6741  th199096 		break;
   1926   6741  th199096 
   1927   6741  th199096 	case LAYOUTRECALL4_ALL:
   1928   6741  th199096 	default:
   1929   6741  th199096 		break;
   1930   6741  th199096 	}
   1931   6741  th199096 
   1932   6741  th199096 	/*
   1933   6741  th199096 	 * Set up the timeout for the callback and make the actual call.
   1934   6741  th199096 	 * Timeout will be 80% of the lease period.
   1935   6741  th199096 	 */
   1936  10448    Thomas 	timeout.tv_sec = (dbe_to_instp(sp->sn_dbe)->lease_period * 80) / 100;
   1937   6741  th199096 	timeout.tv_usec = 0;
   1938   8041      rick retry:
   1939  10448    Thomas 	ch = rfs41_cb_getch(sp);
   1940   7397      rick 	(void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch);
   1941   7397      rick 	call_stat = clnt_call(ch, CB_COMPOUND,
   1942   7397      rick 	    xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args,
   1943   7397      rick 	    xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout);
   1944  10448    Thomas 	rfs41_cb_freech(sp, ch);
   1945   6741  th199096 
   1946   7397      rick 	if (call_stat != RPC_SUCCESS) {
   1947   8041      rick 		switch (lorec->lor_type) {
   1948   8041      rick 		case LAYOUTRECALL4_FILE:
   1949   8041      rick 			if (!retried)
   1950   8041      rick 				delay(SEC_TO_TICK(rfs4_lease_time));
   1951   8041      rick 
   1952  10447    Thomas 			if (rfs41_lo_still_granted(lg)) {
   1953   8041      rick 				if (!retried) {
   1954   8041      rick 					retried = 1;
   1955   8041      rick 					goto retry;
   1956   8041      rick 				}
   1957   8041      rick 
   1958   8041      rick 				/*
   1959   8041      rick 				 * We want to make sure that the layout is
   1960   8041      rick 				 * still granted lest we assert a SEQ4 flag
   1961   8041      rick 				 * that will never be turned off.
   1962   8041      rick 				 */
   1963  10447    Thomas 				rfs41_revoke_layout(lg);
   1964   8041      rick 			}
   1965   8041      rick 			sc = (call_stat == RPC_CANTSEND ||
   1966   8041      rick 			    call_stat == RPC_CANTRECV);
   1967  10448    Thomas 			rfs41_cb_path_down(sp, sc);
   1968   8041      rick 			goto done;
   1969   8041      rick 
   1970   8041      rick 		case LAYOUTRECALL4_FSID:
   1971   8041      rick 		case LAYOUTRECALL4_ALL:
   1972  10448    Thomas 			sp->sn_clnt->rc_bulk_recall = 0;
   1973   8041      rick 			/*
   1974   8041      rick 			 * XXX - how do we determine if layouts still
   1975   8041      rick 			 *	 outstanding for fsid/all cases ?
   1976   8041      rick 			 */
   1977   8041      rick 		default:
   1978   8041      rick 			break;
   1979   8041      rick 		}
   1980   8041      rick 
   1981   8041      rick 	} else {	/* RPC_SUCCESS */
   1982   8041      rick 
   1983   7397      rick 		/*
   1984   8041      rick 		 * Per-type results processing
   1985   7397      rick 		 */
   1986   8041      rick 		switch (lorec->lor_type) {
   1987   8041      rick 		case LAYOUTRECALL4_FILE:
   1988  10447    Thomas 			(void) atomic_swap_32(&lg->lor_reply, 1);
   1989   8041      rick 			break;
   1990   6741  th199096 
   1991   8041      rick 		case LAYOUTRECALL4_FSID:
   1992   8041      rick 		case LAYOUTRECALL4_ALL:
   1993   8041      rick 		default:
   1994   8041      rick 			break;
   1995   8041      rick 		}
   1996   8041      rick 	}
   1997   8041      rick 
   1998   8041      rick 	if (cb4_res.status != NFS4_OK) {
   1999   8041      rick 		nfsstat4	s = cb4_res.status;
   2000   8041      rick 
   2001   8041      rick 		switch (s) {
   2002   8041      rick 		case NFS4ERR_BADHANDLE:
   2003   8041      rick 		case NFS4ERR_BADIOMODE:
   2004   8041      rick 		case NFS4ERR_BADXDR:
   2005   8041      rick 		case NFS4ERR_INVAL:
   2006   8041      rick 		case NFS4ERR_NOMATCHING_LAYOUT:
   2007   8041      rick 		case NFS4ERR_NOTSUPP:
   2008   8041      rick 		case NFS4ERR_OP_NOT_IN_SESSION:
   2009   8041      rick 		case NFS4ERR_REP_TOO_BIG:
   2010   8041      rick 		case NFS4ERR_REP_TOO_BIG_TO_CACHE:
   2011   8041      rick 		case NFS4ERR_REQ_TOO_BIG:
   2012   8041      rick 		case NFS4ERR_TOO_MANY_OPS:
   2013   8041      rick 		case NFS4ERR_UNKNOWN_LAYOUTTYPE:
   2014   8041      rick 		case NFS4ERR_WRONG_TYPE:
   2015   8041      rick 			/* What do we do when it's our own fault ? */
   2016   8041      rick 			cmn_err(CE_NOTE, "cb_lo_recall: %s", nfs41_strerror(s));
   2017   8041      rick 			break;
   2018   8041      rick 
   2019   8041      rick 		case NFS4ERR_DELAY:
   2020   8041      rick 			switch (lorec->lor_type) {
   2021   8041      rick 			case LAYOUTRECALL4_FILE:
   2022   8041      rick 				{
   2023   8041      rick 				bool_t	granted = FALSE;
   2024   8041      rick 
   2025   8041      rick 				if (!retried)
   2026   8041      rick 					delay(SEC_TO_TICK(rfs4_lease_time));
   2027   8041      rick 
   2028  10447    Thomas 				granted = rfs41_lo_still_granted(lg);
   2029   8041      rick 				if (!granted)
   2030   8041      rick 					break;
   2031   8041      rick 
   2032   8041      rick 				if (!retried) {
   2033   8041      rick 					retried = 1;
   2034   8041      rick 					goto retry;
   2035   8041      rick 				}
   2036   8041      rick 
   2037   8041      rick 				if (granted)
   2038  10447    Thomas 					rfs41_revoke_layout(lg);
   2039   8041      rick 				break;
   2040   8041      rick 				}
   2041   8041      rick 
   2042   8041      rick 			case LAYOUTRECALL4_FSID:
   2043   8041      rick 			case LAYOUTRECALL4_ALL:
   2044   8041      rick 			default:
   2045   8041      rick 				break;
   2046   8041      rick 			}
   2047   8041      rick 			break;
   2048   8041      rick 
   2049   8041      rick 		case NFS4ERR_BAD_STATEID:	/* XXX - retry BAD_STATEID ? */
   2050   8041      rick 		default:
   2051   8041      rick 			if (lorec->lor_type == LAYOUTRECALL4_FILE)
   2052  10447    Thomas 				if (rfs41_lo_still_granted(lg))
   2053  10447    Thomas 					rfs41_revoke_layout(lg);
   2054   8041      rick 			break;
   2055   8041      rick 		}
   2056   7397      rick 
   2057   6741  th199096 	}
   2058   7397      rick 	svc_slot_cb_seqid(&cb4_res, p);
   2059   7397      rick done:
   2060   6741  th199096 	kmem_free(lorec, sizeof (mds_lorec_t));
   2061   8041      rick 	rfs4freeargres(&cb4_args, &cb4_res);
   2062   8041      rick 
   2063  10448    Thomas 	svc_slot_free(sp, p);
   2064  10448    Thomas 	rfs41_session_rele(sp);
   2065   8041      rick 
   2066   8041      rick 	/*
   2067   8041      rick 	 * Per-type post-processing
   2068   8041      rick 	 */
   2069   8041      rick 	switch (lorec->lor_type) {
   2070   8041      rick 	case LAYOUTRECALL4_FILE:
   2071  10447    Thomas 		rfs41_lo_grant_rele(lg);
   2072   8041      rick 		break;
   2073   8041      rick 
   2074   8041      rick 	case LAYOUTRECALL4_FSID:
   2075   8041      rick 	case LAYOUTRECALL4_ALL:
   2076   8041      rick 	default:
   2077   8041      rick 		break;
   2078   8041      rick 	}
   2079   6741  th199096 }
   2080   6741  th199096 
   2081   8041      rick /*
   2082   8041      rick  * Bulk Layout Recall (ALL)
   2083   8041      rick  */
   2084   6741  th199096 static void
   2085   8041      rick all_lor(rfs4_entry_t entry, void *args)
   2086   8041      rick {
   2087  10448    Thomas 	mds_session_t	*sp = (mds_session_t *)entry;
   2088   8041      rick 	mds_lorec_t	*lrp = (mds_lorec_t *)args;
   2089   8041      rick 	mds_lorec_t	*lorec;
   2090   8041      rick 
   2091  10448    Thomas 	if (sp == NULL || lrp == NULL)
   2092  10448    Thomas 		return;
   2093  10448    Thomas 
   2094  10448    Thomas 	ASSERT(rfs4_dbe_islocked(sp->sn_dbe));
   2095   8041      rick 	lorec = kmem_zalloc(sizeof (mds_lorec_t), KM_SLEEP);
   2096   8041      rick 	bcopy(args, lorec, sizeof (mds_lorec_t));
   2097   8041      rick 
   2098  10448    Thomas 	rfs4_dbe_hold(sp->sn_dbe);
   2099  10448    Thomas 	lorec->lor_sess = sp;
   2100   8041      rick 
   2101   8041      rick 	(void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, TS_RUN,
   2102   8041      rick 	    minclsyspri);
   2103   8041      rick }
   2104   8041      rick 
   2105   8041      rick /*
   2106   8041      rick  * Layout Recall by FSID
   2107   8041      rick  */
   2108   8041      rick static void
   2109  10016    Thomas fsid_lor(rfs4_entry_t u_entry, void *args)
   2110   8041      rick {
   2111   8041      rick 	mds_lorec_t		*lrp = (mds_lorec_t *)args;
   2112  10447    Thomas 	mds_ever_grant_t	*eg = (mds_ever_grant_t *)u_entry;
   2113  10447    Thomas 	mds_ever_grant_t	key;
   2114   8041      rick 	vnode_t			*vp = NULL;
   2115   8041      rick 
   2116  10447    Thomas 	if (eg == NULL || lrp == NULL || rfs4_dbe_is_invalid(eg->eg_dbe))
   2117  10447    Thomas 		return;
   2118  10447    Thomas 
   2119  10447    Thomas 	ASSERT(rfs4_dbe_islocked(eg->eg_dbe));
   2120   8041      rick 	if ((vp = (vnode_t *)lrp->lor_vp) == NULL)
   2121   8041      rick 		return;
   2122   8041      rick 
   2123  10447    Thomas 	key.eg_fsid = vp->v_vfsp->vfs_fsid;
   2124  10016    Thomas 	if (mds_ever_grant_fsid_compare(u_entry,
   2125  10447    Thomas 	    (void *)(uintptr_t)key.eg_key)) {
   2126   8041      rick 		mds_lorec_t	*lorec;
   2127  10448    Thomas 		mds_session_t	*sp;
   2128  10016    Thomas 		nfs_server_instance_t	*instp;
   2129  10016    Thomas 
   2130  10016    Thomas 		instp = dbe_to_instp(u_entry->dbe);
   2131   8041      rick 
   2132   8041      rick 		lorec = kmem_zalloc(sizeof (mds_lorec_t), KM_SLEEP);
   2133   8041      rick 		bcopy(args, lorec, sizeof (mds_lorec_t));
   2134   8041      rick 
   2135  10447    Thomas 		ASSERT(eg->eg_cp != NULL);
   2136  10448    Thomas 		sp = mds_findsession_by_clid(instp, eg->eg_cp->rc_clientid);
   2137  10448    Thomas 		if (sp == NULL) {
   2138   8041      rick 			kmem_free(lorec, sizeof (mds_lorec_t));
   2139   8041      rick 			return;
   2140   8041      rick 		}
   2141  10448    Thomas 		lorec->lor_sess = sp;	/* hold courtesy of findsession */
   2142   8041      rick 
   2143   8041      rick 		(void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0,
   2144   8041      rick 		    TS_RUN, minclsyspri);
   2145   8041      rick 	}
   2146   8041      rick }
   2147   8041      rick 
   2148   8041      rick /*
   2149   8041      rick  * Layout Recall by File
   2150   8041      rick  */
   2151   8041      rick static void
   2152   8041      rick file_lor(rfs4_entry_t entry, void *arg)
   2153   6741  th199096 {
   2154   6741  th199096 	mds_lorec_t *lorec;
   2155   6741  th199096 
   2156   6741  th199096 	lorec = kmem_alloc(sizeof (mds_lorec_t), KM_SLEEP);
   2157   6741  th199096 	bcopy(arg, lorec, sizeof (mds_lorec_t));
   2158   8041      rick 	lorec->lor_sess = (mds_session_t *)entry;
   2159   6741  th199096 
   2160   8041      rick 	(void) thread_create(NULL, 0, mds_do_lorecall, lorec, 0, &p0, TS_RUN,
   2161   8041      rick 	    minclsyspri);
   2162   7739   jwahlig }
   2163   7739   jwahlig 
   2164   8312   webaker 
   2165   6741  th199096 /*
   2166   7739   jwahlig  * Recall a layout:
   2167   7739   jwahlig  *
   2168   7739   jwahlig  *   Either all layouts
   2169   7739   jwahlig  *
   2170   7739   jwahlig  *   ... or
   2171   7739   jwahlig  *
   2172   7739   jwahlig  *   For a given pathname construct FH first (same thing we do
   2173   7739   jwahlig  *   for nfs_sys(GETFH)) args have already been copied into kernel
   2174   7739   jwahlig  *   adspace
   2175   6741  th199096  */
   2176   6741  th199096 static int
   2177   6741  th199096 mds_lorecall_cmd(struct mds_reclo_args *args, cred_t *cr)
   2178   6741  th199096 {
   2179   8041      rick 	int			 error;
   2180   8041      rick 	nfs_fh4			 fh4;
   2181   8041      rick 	struct exportinfo	*exi;
   2182   8041      rick 	mds_lorec_t		 lorec;
   2183   8041      rick 	vnode_t			*vp = NULL;
   2184   8041      rick 	vnode_t			*dvp = NULL;
   2185   8041      rick 	rfs4_file_t		*fp = NULL;
   2186   8041      rick 	rfs4_client_t		*cp = NULL;
   2187   8041      rick 	rfs41_grant_list_t	*glp = NULL;
   2188  10448    Thomas 	mds_session_t		*sp = NULL;
   2189   8041      rick 
   2190   8041      rick 	lorec.lor_type = args->lo_type;
   2191   8041      rick 	switch (args->lo_type) {
   2192   8041      rick 	case LAYOUTRECALL4_ALL:
   2193   8041      rick 		if (mds_server->mds_session_tab == NULL)
   2194   8041      rick 			return (ECANCELED);
   2195   8041      rick 
   2196   8041      rick 		rfs4_dbe_walk(mds_server->mds_session_tab, all_lor, &lorec);
   2197   8041      rick 		return (0);
   2198   8041      rick 
   2199   8041      rick 	case LAYOUTRECALL4_FILE:
   2200   8041      rick 	case LAYOUTRECALL4_FSID:
   2201   8041      rick 		break;
   2202   8041      rick 
   2203   8041      rick 	default:
   2204   8041      rick 		return (EINVAL);
   2205   8041      rick 	}
   2206   8041      rick 
   2207   8041      rick 	if (error = lookupname(args->lo_fname, UIO_SYSSPACE, FOLLOW, &dvp, &vp))
   2208   8041      rick 		return (error);
   2209   8041      rick 
   2210   8041      rick 	if (vp == NULL) {
   2211   8041      rick 		if (dvp != NULL)
   2212   8041      rick 			VN_RELE(dvp);
   2213   8041      rick 		return (ENOENT);
   2214   8041      rick 	}
   2215   6741  th199096 
   2216   7397      rick 	/*
   2217   8041      rick 	 * 'vp' may be an AUTOFS node, so we perform a VOP_ACCESS()
   2218   8041      rick 	 * to trigger the mount of the intended filesystem, so we
   2219   8041      rick 	 * can share the intended filesystem instead of the AUTOFS
   2220   8041      rick 	 * filesystem.
   2221   6741  th199096 	 */
   2222   6741  th199096 	(void) VOP_ACCESS(vp, 0, 0, cr, NULL);
   2223   6741  th199096 
   2224   6741  th199096 	/*
   2225   8041      rick 	 * We're interested in the top most filesystem. This is
   2226   8041      rick 	 * specially important when uap->dname is a trigger AUTOFS
   2227   8041      rick 	 * node, since we're really interested in sharing the
   2228   6741  th199096 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
   2229   8041      rick 	 * call, not the AUTOFS node itself.
   2230   6741  th199096 	 */
   2231   6741  th199096 	if (vn_mountedvfs(vp) != NULL) {
   2232   6741  th199096 		if (error = traverse(&vp))
   2233   6741  th199096 			goto errout;
   2234   6741  th199096 	}
   2235   6741  th199096 
   2236   6741  th199096 	/*
   2237   8041      rick 	 * The last arg for nfs_vptoexi says to create a v4 FH
   2238   8041      rick 	 * (instead of v3). This will need to be changed to
   2239   8041      rick 	 * select the new MDS FH format.
   2240   6741  th199096 	 */
   2241   6741  th199096 	rw_enter(&exported_lock, RW_READER);
   2242   6741  th199096 	exi = nfs_vptoexi(dvp, vp, cr, NULL, &error, TRUE);
   2243   6741  th199096 	rw_exit(&exported_lock);
   2244   6741  th199096 
   2245   6741  th199096 	/*
   2246   6741  th199096 	 * file isn't shared.
   2247   6741  th199096 	 */
   2248   6741  th199096 	if (exi == NULL)
   2249   6741  th199096 		goto errout;
   2250   6741  th199096 
   2251   6741  th199096 	fh4.nfs_fh4_val = lorec.lor_fh.fh_buf;
   2252   6741  th199096 	error = mknfs41_fh(&fh4, vp, exi);
   2253   6741  th199096 	lorec.lor_fh.fh_len = fh4.nfs_fh4_len;
   2254   6741  th199096 	lorec.lor_sess = NULL;
   2255   6741  th199096 
   2256   8041      rick 	switch (lorec.lor_type) {
   2257   8041      rick 	case LAYOUTRECALL4_FILE:
   2258  10462    Thomas 		mutex_enter(&vp->v_vsd_lock);
   2259   8041      rick 		fp = (rfs4_file_t *)vsd_get(vp, mds_server->vkey);
   2260  10462    Thomas 		mutex_exit(&vp->v_vsd_lock);
   2261   8041      rick 		if (fp == NULL) {
   2262   8041      rick 			error = EIO;
   2263   8041      rick 			goto errout;
   2264   8041      rick 		}
   2265   6741  th199096 
   2266   8041      rick 		/*
   2267   8041      rick 		 * There may be a cleaner way to run the per-file lists,
   2268   8041      rick 		 * but this works for now. This sends a cb_lo_recall to
   2269   8041      rick 		 * the clients that have an active layout for the file,
   2270   8041      rick 		 * only. Stop the blasting !
   2271   8041      rick 		 */
   2272  10447    Thomas 		glp = fp->rf_lo_grant_list.next;
   2273  10447    Thomas 		for (; glp && glp->lg; glp = glp->next) {
   2274  10447    Thomas 
   2275  10447    Thomas 			if ((cp = glp->lg->lo_cp) == NULL)
   2276   8041      rick 				continue;	/* internal inconsistency ? */
   2277   6741  th199096 
   2278  10447    Thomas 			rfs41_lo_grant_hold(glp->lg);
   2279  10448    Thomas 			sp = mds_findsession_by_clid(mds_server,
   2280  10447    Thomas 			    cp->rc_clientid);
   2281  10448    Thomas 			if (sp != NULL) {
   2282   8041      rick 				/*
   2283   8041      rick 				 * Recall in progress !
   2284   8041      rick 				 *
   2285   8041      rick 				 * As per spec rules, bump up the seqid (of
   2286   8041      rick 				 * the stateid) and make sure we store it in
   2287   8041      rick 				 * the layout grant info; this will eventually
   2288   8041      rick 				 * be used for layout race detection.
   2289   8041      rick 				 */
   2290  10447    Thomas 				rfs4_dbe_lock(glp->lg->lo_dbe);
   2291  10447    Thomas 
   2292  10447    Thomas 				glp->lg->lo_status = LO_RECALL_INPROG;
   2293  10447    Thomas 				rfs41_lo_seqid(&glp->lg->lo_stateid);
   2294  10447    Thomas 
   2295  10447    Thomas 				mutex_enter(&glp->lg->lo_lock);
   2296  10447    Thomas 				glp->lg->lor_seqid =
   2297  10447    Thomas 				    glp->lg->lo_stateid.v41_bits.chgseq;
   2298  10447    Thomas 				mutex_exit(&glp->lg->lo_lock);
   2299  10447    Thomas 
   2300  10447    Thomas 				bcopy(&glp->lg->lo_stateid.stateid,
   2301   8041      rick 				    &lorec.lor_stid, sizeof (stateid4));
   2302  10447    Thomas 				lorec.lor_lg = glp->lg;
   2303  10447    Thomas 				rfs41_lo_grant_hold(glp->lg);
   2304  10447    Thomas 
   2305  10447    Thomas 				rfs4_dbe_unlock(glp->lg->lo_dbe);
   2306  10448    Thomas 				file_lor((rfs4_entry_t)sp, (void *)&lorec);
   2307  10447    Thomas 			}
   2308  10447    Thomas 			rfs41_lo_grant_rele(glp->lg);
   2309   8041      rick 		}
   2310   8041      rick 		break;
   2311   8041      rick 
   2312   8041      rick 	case LAYOUTRECALL4_FSID:
   2313   8041      rick 		/*
   2314   8041      rick 		 * set fsid just like rfs4_fattr4_fsid()
   2315   8041      rick 		 */
   2316   8041      rick 		if (exi->exi_volatile_dev) {
   2317   8041      rick 			int *pmaj = (int *)&lorec.lor_fsid.major;
   2318   8041      rick 
   2319   8041      rick 			pmaj[0] = exi->exi_fsid.val[0];
   2320   8041      rick 			pmaj[1] = exi->exi_fsid.val[1];
   2321   8041      rick 			lorec.lor_fsid.minor = 0;
   2322   8041      rick 		} else {
   2323   8041      rick 			vattr_t va;
   2324   8041      rick 
   2325   8041      rick 			va.va_mask = AT_FSID | AT_TYPE;
   2326   8041      rick 			error = rfs4_vop_getattr(vp, &va, 0, cr);
   2327   8041      rick 
   2328   8041      rick 			if (error == 0 && va.va_type != VREG)
   2329   8041      rick 				error = EINVAL;
   2330   8041      rick 			if (error)
   2331   8041      rick 				goto errout;
   2332   8041      rick 
   2333   8041      rick 			lorec.lor_fsid.major = getmajor(va.va_fsid);
   2334   8041      rick 			lorec.lor_fsid.minor = getminor(va.va_fsid);
   2335   8041      rick 		}
   2336   8041      rick 
   2337   8041      rick 		if (mds_server->mds_ever_grant_tab == NULL) {
   2338   8041      rick 			error = ECANCELED;
   2339   6741  th199096 			goto errout;
   2340   8041      rick 		}
   2341   6741  th199096 
   2342   8041      rick 		lorec.lor_vp = vp;
   2343   8041      rick 		VN_HOLD(vp);
   2344   8041      rick 		rfs4_dbe_walk(mds_server->mds_ever_grant_tab, fsid_lor, &lorec);
   2345   8041      rick 		VN_RELE(vp);
   2346   8041      rick 		break;
   2347   8041      rick 
   2348   8041      rick 	default:
   2349   8041      rick 		break;
   2350   6741  th199096 	}
   2351   6741  th199096 
   2352   6741  th199096 errout:
   2353   6741  th199096 	VN_RELE(vp);
   2354   6741  th199096 	if (dvp != NULL)
   2355   6741  th199096 		VN_RELE(dvp);
   2356   6741  th199096 	return (error);
   2357   8312   webaker }
   2358   8312   webaker 
   2359   8312   webaker /* support for device notifications via mdsadm */
   2360   8312   webaker 
   2361   8312   webaker typedef struct mds_notify_device {
   2362   8312   webaker 	mds_session_t			*nd_sess;
   2363   8312   webaker 	struct mds_notifydev_args	 nd_args;
   2364   8312   webaker 
   2365   8312   webaker } mds_notify_device_t;
   2366   8312   webaker 
   2367   8312   webaker static void
   2368   8312   webaker mds_do_notify_device(mds_notify_device_t *ndp)
   2369   8312   webaker {
   2370   8312   webaker 	CB_COMPOUND4args	 cb4_args;
   2371   8312   webaker 	CB_COMPOUND4res		 cb4_res;
   2372   8312   webaker 	CB_SEQUENCE4args	*cbsap;
   2373   8312   webaker 	CB_NOTIFY_DEVICEID4args *cbndap;
   2374   8312   webaker 	nfs_cb_argop4		*argops;
   2375   8312   webaker 	struct timeval		 timeout;
   2376   8312   webaker 	enum clnt_stat		 call_stat = RPC_FAILED;
   2377   8312   webaker 	int			 zilch = 0;
   2378   8312   webaker 	CLIENT			*ch;
   2379   8312   webaker 	int			 numops;
   2380   8312   webaker 	int			 argsz;
   2381  10448    Thomas 	mds_session_t		*sp;
   2382   8312   webaker 	slot_ent_t		*p;
   2383   8312   webaker 	notify4			 no;
   2384   8312   webaker 	char			*xdr_buf = NULL;
   2385   8312   webaker 	int			 xdr_size;
   2386   8312   webaker 	XDR			 xdr;
   2387   8312   webaker 
   2388   8312   webaker 	DTRACE_PROBE1(nfssrv__i__sess_notify_device, mds_notify_device_t *,
   2389   8312   webaker 	    ndp);
   2390   8312   webaker 
   2391   8312   webaker 	if (ndp->nd_sess == NULL)
   2392   8312   webaker 		return;
   2393  10448    Thomas 	sp = ndp->nd_sess;
   2394   8312   webaker 
   2395   8312   webaker 	/*
   2396   8312   webaker 	 * XXX - until we fix blasting _all_ sessions for one notification,
   2397   8312   webaker 	 *	make sure that the session in question at least has the
   2398   8312   webaker 	 *	back chan established.
   2399   8312   webaker 	 */
   2400  10448    Thomas 	if (!SN_CB_CHAN_EST(sp))
   2401   8312   webaker 		return;
   2402   8312   webaker 
   2403   8312   webaker 	/*
   2404   8312   webaker 	 * set up the compound args
   2405   8312   webaker 	 */
   2406   8312   webaker 	numops = 2;	/* CB_SEQUENCE + CB_NOTIFY_DEVICE */
   2407   8312   webaker 	argsz = numops * sizeof (nfs_cb_argop4);
   2408   8312   webaker 	argops = kmem_zalloc(argsz, KM_SLEEP);
   2409   8312   webaker 
   2410   8312   webaker 	argops[0].argop = OP_CB_SEQUENCE;
   2411   8312   webaker 	cbsap = &argops[0].nfs_cb_argop4_u.opcbsequence;
   2412   8312   webaker 
   2413   8312   webaker 	argops[1].argop = OP_CB_NOTIFY_DEVICEID;
   2414   8312   webaker 	cbndap = &argops[1].nfs_cb_argop4_u.opcbnotify_deviceid;
   2415   8312   webaker 
   2416   8312   webaker 	(void) str_to_utf8("cb_notify_device", &cb4_args.tag);
   2417   8312   webaker 	cb4_args.minorversion = CB4_MINOR_v1;
   2418   8312   webaker 
   2419  10448    Thomas 	cb4_args.callback_ident = sp->sn_bc.progno;
   2420   8312   webaker 	cb4_args.array_len = numops;
   2421   8312   webaker 	cb4_args.array = argops;
   2422   8312   webaker 
   2423   8312   webaker 	cb4_res.tag.utf8string_val = NULL;
   2424   8312   webaker 	cb4_res.array = NULL;
   2425   8312   webaker 
   2426   8312   webaker 	/*
   2427   8312   webaker 	 * CB_SEQUENCE
   2428   8312   webaker 	 */
   2429  10448    Thomas 	bcopy(sp->sn_sessid, cbsap->csa_sessionid, sizeof (sessionid4));
   2430  10448    Thomas 	p = svc_slot_alloc(sp);
   2431   8312   webaker 	mutex_enter(&p->se_lock);
   2432   8312   webaker 	cbsap->csa_slotid = p->se_sltno;
   2433   8312   webaker 	cbsap->csa_sequenceid = p->se_seqid;
   2434  10448    Thomas 	cbsap->csa_highest_slotid = svc_slot_maxslot(sp);
   2435   8312   webaker 	cbsap->csa_cachethis = FALSE;
   2436   8312   webaker 
   2437   8312   webaker 	/* no referring calling list for device notifications */
   2438   8312   webaker 	cbsap->csa_rcall_llen = 0;
   2439   8312   webaker 	cbsap->csa_rcall_lval = NULL;
   2440   8312   webaker 	mutex_exit(&p->se_lock);
   2441   8312   webaker 
   2442   8312   webaker 	/*
   2443   8312   webaker 	 * CB_NOTIFY_DEVICEID (well, d'uh)
   2444   8312   webaker 	 */
   2445   8312   webaker 	cbndap->cnda_changes.cnda_changes_len = 1;
   2446   8312   webaker 	cbndap->cnda_changes.cnda_changes_val = &no;
   2447   8312   webaker 	if (ndp->nd_args.notify_how == NOTIFY_DEVICEID4_DELETE) {
   2448   8312   webaker 		notify_deviceid_delete4 nodd;
   2449   8312   webaker 
   2450   8312   webaker 		no.notify_mask = NOTIFY_DEVICEID4_DELETE_MASK;
   2451   8312   webaker 		nodd.ndd_layouttype = LAYOUT4_NFSV4_1_FILES;
   2452   9209      Jeff 		(void) memset(&nodd.ndd_deviceid, 0, sizeof (deviceid4));
   2453   8312   webaker 		bcopy(&ndp->nd_args.dev_id, &nodd.ndd_deviceid,
   2454   8312   webaker 		    sizeof (ndp->nd_args.dev_id));
   2455   8312   webaker 
   2456   8312   webaker 		/* encode the notification blob */
   2457   8312   webaker 
   2458   8312   webaker 		xdr_size = xdr_sizeof(xdr_notify_deviceid_delete4, &nodd);
   2459   8312   webaker 		ASSERT(xdr_size);
   2460   8312   webaker 		xdr_buf = kmem_alloc(xdr_size, KM_SLEEP);
   2461   8312   webaker 		xdrmem_create(&xdr, xdr_buf, xdr_size, XDR_ENCODE);
   2462   8312   webaker 
   2463   8312   webaker 		if (xdr_notify_deviceid_delete4(&xdr, &nodd) == FALSE)
   2464   8312   webaker 			goto done;
   2465   8312   webaker 
   2466   8312   webaker 		/*
   2467   8312   webaker 		 * Once the blob is encoded, we no longer need
   2468   8312   webaker 		 * nodd, which goes out of scope here.
   2469   8312   webaker 		 */
   2470   8312   webaker 
   2471   8312   webaker 	} else {
   2472   8312   webaker 		notify_deviceid_change4 nodc;
   2473   8312   webaker 
   2474   8312   webaker 		no.notify_mask = NOTIFY_DEVICEID4_CHANGE_MASK;
   2475   8312   webaker 		nodc.ndc_layouttype = LAYOUT4_NFSV4_1_FILES;
   2476   9209      Jeff 		(void) memset(&nodc.ndc_deviceid, 0, sizeof (deviceid4));
   2477   8312   webaker 		bcopy(&ndp->nd_args.dev_id, &nodc.ndc_deviceid,
   2478   8312   webaker 		    sizeof (ndp->nd_args.dev_id));
   2479   8312   webaker 
   2480   8312   webaker 		xdr_size = xdr_sizeof(xdr_notify_deviceid_change4, &nodc);
   2481   8312   webaker 		ASSERT(xdr_size);
   2482   8312   webaker 		xdr_buf = kmem_alloc(xdr_size, KM_SLEEP);
   2483   8312   webaker 		xdrmem_create(&xdr, xdr_buf, xdr_size, XDR_ENCODE);
   2484   8312   webaker 
   2485   8312   webaker 		if (xdr_notify_deviceid_change4(&xdr, &nodc) == FALSE) {
   2486   8312   webaker 			kmem_free(xdr_buf, xdr_size);
   2487   8312   webaker 			xdr_size = 0;
   2488   8312   webaker 			xdr_buf = NULL;
   2489   8312   webaker 		}
   2490   8312   webaker 	}
   2491   8312   webaker 
   2492   8312   webaker 	no.notify_vals.notifylist4_len = xdr_size;
   2493   8312   webaker 	no.notify_vals.notifylist4_val = xdr_buf;
   2494   8312   webaker 
   2495   8312   webaker 	/*
   2496   8312   webaker 	 * Set up the timeout for the callback and make the actual call.
   2497   8312   webaker 	 * Timeout will be 80% of the lease period.
   2498   8312   webaker 	 */
   2499   8312   webaker 	timeout.tv_sec =
   2500  10448    Thomas 	    (dbe_to_instp(sp->sn_dbe)->lease_period * 80) / 100;
   2501   8312   webaker 	timeout.tv_usec = 0;
   2502   8312   webaker 
   2503  10448    Thomas 	ch = rfs41_cb_getch(sp);
   2504   8312   webaker 	(void) CLNT_CONTROL(ch, CLSET_XID, (char *)&zilch);
   2505   8312   webaker 	call_stat = clnt_call(ch, CB_COMPOUND,
   2506   8312   webaker 	    xdr_CB_COMPOUND4args_srv, (caddr_t)&cb4_args,
   2507   8312   webaker 	    xdr_CB_COMPOUND4res, (caddr_t)&cb4_res, timeout);
   2508  10448    Thomas 	rfs41_cb_freech(sp, ch);
   2509   8312   webaker 
   2510   8312   webaker 	/*
   2511   8312   webaker 	 * Errors from the client are harmless for now, since this
   2512   8312   webaker 	 * is invoked by an administrative action for testing purposes.
   2513   8312   webaker 	 * In the future, if this were part of the normal server action,
   2514   8312   webaker 	 * these errors would need to be handled.
   2515   8312   webaker 	 */
   2516   8312   webaker 	if (call_stat != RPC_SUCCESS) {
   2517   8312   webaker 		cmn_err(CE_NOTE, "mds_do_notify_device: RPC call failed %d",
   2518   8312   webaker 		    call_stat);
   2519   8312   webaker 		goto done;
   2520   8312   webaker 
   2521   8312   webaker 	} else if (cb4_res.status != NFS4_OK) {
   2522   8312   webaker 		cmn_err(CE_NOTE, "mds_do_notify_device: compound failed %d",
   2523   8312   webaker 		    cb4_res.status);
   2524   8312   webaker 
   2525   8312   webaker 	}
   2526   8312   webaker 	svc_slot_cb_seqid(&cb4_res, p);
   2527   8312   webaker 	xdr_free(xdr_CB_COMPOUND4res, (caddr_t)&cb4_res);
   2528   8312   webaker done:
   2529   8312   webaker 	kmem_free(cb4_args.tag.utf8string_val, cb4_args.tag.utf8string_len);
   2530   8312   webaker 	kmem_free(argops, argsz);
   2531   8312   webaker 	kmem_free(ndp, sizeof (*ndp));
   2532   8312   webaker 	if (xdr_buf)
   2533   8312   webaker 		kmem_free(xdr_buf, xdr_size);
   2534  10448    Thomas 	svc_slot_free(sp, p);
   2535   8312   webaker }
   2536   8312   webaker 
   2537   8312   webaker static void
   2538   8312   webaker mds_sess_notify_device_callout(rfs4_entry_t u_entry, void *arg)
   2539   8312   webaker {
   2540   8312   webaker 	mds_notify_device_t *ndp;
   2541   8312   webaker 
   2542   8312   webaker 	ndp = kmem_alloc(sizeof (*ndp), KM_SLEEP);
   2543   8312   webaker 	bcopy(arg, &ndp->nd_args, sizeof (ndp->nd_args));
   2544   8312   webaker 	ndp->nd_sess = (mds_session_t *)u_entry;
   2545   8312   webaker 
   2546   8312   webaker 	(void) thread_create(NULL, 0, mds_do_notify_device, ndp, 0, &p0,
   2547   8312   webaker 	    TS_RUN, minclsyspri);
   2548   8312   webaker }
   2549   8312   webaker 
   2550   8312   webaker void
   2551   8312   webaker inst_notify_device(nfs_server_instance_t *instp, void *args)
   2552   8312   webaker {
   2553   8312   webaker 	if (instp->mds_session_tab != NULL)
   2554   8312   webaker 		rfs4_dbe_walk(instp->mds_session_tab,
   2555   8312   webaker 		    mds_sess_notify_device_callout, args);
   2556   8312   webaker }
   2557   8312   webaker 
   2558   8312   webaker /*ARGSUSED*/
   2559   8312   webaker static int
   2560   8312   webaker mds_notify_device_cmd(struct mds_notifydev_args *args, cred_t *cr)
   2561   8312   webaker {
   2562   8312   webaker 	/*
   2563   8312   webaker 	 * Walk the list of server instances, asking each
   2564   8312   webaker 	 * to notify the specified device.
   2565   8312   webaker 	 */
   2566   8312   webaker 	nsi_walk(inst_notify_device, args);
   2567   8312   webaker 	return (0);
   2568   6741  th199096 }
   2569   6741  th199096 
   2570   6741  th199096 /*
   2571   6741  th199096  * -----------------------------------------------
   2572   7739   jwahlig  * MDS: DS_ADDR tables.
   2573   6741  th199096  * -----------------------------------------------
   2574   6741  th199096  *
   2575   6741  th199096  */
   2576   6741  th199096 
   2577   6741  th199096 static uint32_t
   2578   7811    Thomas ds_addrlist_hash(void *key)
   2579   6741  th199096 {
   2580   6741  th199096 	return ((uint32_t)(uintptr_t)key);
   2581   6741  th199096 }
   2582   6741  th199096 
   2583   6741  th199096 static bool_t
   2584  10016    Thomas ds_addrlist_compare(rfs4_entry_t u_entry, void *key)
   2585  10016    Thomas {
   2586  10016    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)u_entry;
   2587   6741  th199096 
   2588   6741  th199096 	return (rfs4_dbe_getid(dp->dbe) == (int)(uintptr_t)key);
   2589   6741  th199096 }
   2590   6741  th199096 
   2591   6741  th199096 static void *
   2592   7811    Thomas ds_addrlist_mkkey(rfs4_entry_t entry)
   2593   6741  th199096 {
   2594   7811    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)entry;
   2595   6741  th199096 
   2596   6741  th199096 	return ((void *)(uintptr_t)rfs4_dbe_getid(dp->dbe));
   2597   6741  th199096 }
   2598   6741  th199096 
   2599   6741  th199096 /*ARGSUSED*/
   2600   6741  th199096 static bool_t
   2601   7811    Thomas ds_addrlist_create(rfs4_entry_t u_entry, void *arg)
   2602   6741  th199096 {
   2603   7811    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)u_entry;
   2604   6741  th199096 	struct mds_adddev_args *u_dp = (struct mds_adddev_args *)arg;
   2605   6741  th199096 
   2606   9404    Thomas 	dp->dev_addr.na_r_netid = kstrdup(u_dp->dev_netid);
   2607   9404    Thomas 	dp->dev_addr.na_r_addr = kstrdup(u_dp->dev_addr);
   2608   7739   jwahlig 	dp->ds_owner = NULL;
   2609   7740    Robert 	dp->dev_knc = NULL;
   2610   7740    Robert 	dp->dev_nb = NULL;
   2611   9407    Thomas 	dp->ds_addr_key = 0;
   2612   9407    Thomas 	dp->ds_port_key = 0;
   2613   9404    Thomas 
   2614   6741  th199096 	return (TRUE);
   2615   6741  th199096 }
   2616   6741  th199096 
   2617   6741  th199096 /*ARGSUSED*/
   2618   6741  th199096 static void
   2619  10016    Thomas ds_addrlist_destroy(rfs4_entry_t u_entry)
   2620  10016    Thomas {
   2621  10016    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)u_entry;
   2622   9404    Thomas 	int	i;
   2623  10016    Thomas 	nfs_server_instance_t	*instp;
   2624  10016    Thomas 
   2625  10016    Thomas 	instp = dbe_to_instp(u_entry->dbe);
   2626  10016    Thomas 
   2627  10016    Thomas 	rw_enter(&instp->ds_addrlist_lock, RW_WRITER);
   2628   9404    Thomas 	if (dp->ds_owner != NULL) {
   2629   9404    Thomas 		list_remove(&dp->ds_owner->ds_addrlist_list, dp);
   2630   9404    Thomas 		rfs4_dbe_rele(dp->ds_owner->dbe);
   2631   9404    Thomas 		dp->ds_owner = NULL;
   2632   9404    Thomas 	}
   2633  10016    Thomas 	rw_exit(&instp->ds_addrlist_lock);
   2634   9404    Thomas 
   2635   9404    Thomas 	if (dp->dev_addr.na_r_netid) {
   2636   9404    Thomas 		i = strlen(dp->dev_addr.na_r_netid) + 1;
   2637   9404    Thomas 		kmem_free(dp->dev_addr.na_r_netid, i);
   2638   9404    Thomas 	}
   2639   9404    Thomas 
   2640   9404    Thomas 	if (dp->dev_addr.na_r_addr) {
   2641   9404    Thomas 		i = strlen(dp->dev_addr.na_r_addr) + 1;
   2642   9404    Thomas 		kmem_free(dp->dev_addr.na_r_addr, i);
   2643   9404    Thomas 	}
   2644   7740    Robert 
   2645   7740    Robert 	if (dp->dev_knc != NULL)
   2646   7740    Robert 		kmem_free(dp->dev_knc, sizeof (struct knetconfig));
   2647   9404    Thomas 
   2648   7740    Robert 	if (dp->dev_nb != NULL) {
   2649   7740    Robert 		if (dp->dev_nb->buf)
   2650   7740    Robert 			kmem_free(dp->dev_nb->buf, dp->dev_nb->maxlen);
   2651   7740    Robert 		kmem_free(dp->dev_nb, sizeof (struct netbuf));
   2652   7740    Robert 	}
   2653   6741  th199096 }
   2654   6741  th199096 
   2655   6741  th199096 
   2656   6741  th199096 /*
   2657   6741  th199096  * Multipath devices.
   2658   6741  th199096  */
   2659   6741  th199096 static uint32_t
   2660   6741  th199096 mds_mpd_hash(void *key)
   2661   6741  th199096 {
   2662   6741  th199096 	return ((uint32_t)(uintptr_t)key);
   2663   6741  th199096 }
   2664   6741  th199096 
   2665   6741  th199096 static bool_t
   2666  10016    Thomas mds_mpd_compare(rfs4_entry_t u_entry, void *key)
   2667  10016    Thomas {
   2668  10016    Thomas 	mds_mpd_t *mp = (mds_mpd_t *)u_entry;
   2669  10016    Thomas 
   2670  10016    Thomas 	return (mp->mpd_id == (id_t)(uintptr_t)key);
   2671  10016    Thomas }
   2672  10016    Thomas 
   2673  10016    Thomas static void *
   2674  10016    Thomas mds_mpd_mkkey(rfs4_entry_t u_entry)
   2675  10016    Thomas {
   2676  10016    Thomas 	mds_mpd_t *mp = (mds_mpd_t *)u_entry;
   2677  10016    Thomas 
   2678  10016    Thomas 	return ((void*)(uintptr_t)mp->mpd_id);
   2679   6741  th199096 }
   2680   6741  th199096 
   2681   6741  th199096 void
   2682   6741  th199096 mds_mpd_encode(nfsv4_1_file_layout_ds_addr4 *ds_dev, uint_t *len, char **val)
   2683   6741  th199096 {
   2684   6741  th199096 	char *xdr_ds_dev;
   2685   6741  th199096 	int  xdr_size = 0;
   2686   6741  th199096 	XDR  xdr;
   2687   6741  th199096 
   2688   6741  th199096 	ASSERT(val);
   2689   6741  th199096 
   2690   6741  th199096 	xdr_size = xdr_sizeof(xdr_nfsv4_1_file_layout_ds_addr4, ds_dev);
   2691   6741  th199096 
   2692   6741  th199096 	ASSERT(xdr_size);
   2693   6741  th199096 
   2694   6741  th199096 	xdr_ds_dev = kmem_alloc(xdr_size, KM_SLEEP);
   2695   6741  th199096 
   2696   6741  th199096 	xdrmem_create(&xdr, xdr_ds_dev, xdr_size, XDR_ENCODE);
   2697   6741  th199096 
   2698   6741  th199096 	if (xdr_nfsv4_1_file_layout_ds_addr4(&xdr, ds_dev) == FALSE) {
   2699   6741  th199096 		*len = 0;
   2700   6741  th199096 		*val = NULL;
   2701   6741  th199096 		kmem_free(xdr_ds_dev, xdr_size);
   2702   6741  th199096 		return;
   2703   6741  th199096 	}
   2704   6741  th199096 
   2705   6741  th199096 	*len = xdr_size;
   2706   6741  th199096 	*val = xdr_ds_dev;
   2707   6741  th199096 }
   2708   6741  th199096 
   2709   6741  th199096 /*ARGSUSED*/
   2710   6741  th199096 static bool_t
   2711   7739   jwahlig mds_mpd_create(rfs4_entry_t u_entry, void *arg)
   2712   6741  th199096 {
   2713  10016    Thomas 	mds_mpd_t *mp = (mds_mpd_t *)u_entry;
   2714   6741  th199096 	mds_addmpd_t *maap = (mds_addmpd_t *)arg;
   2715   6741  th199096 
   2716  10016    Thomas 	mp->mpd_id = maap->id;
   2717  10016    Thomas 	mds_mpd_encode(maap->ds_addr4, &(mp->mpd_encoded_len),
   2718  10016    Thomas 	    &(mp->mpd_encoded_val));
   2719  10016    Thomas 	list_create(&mp->mpd_layouts_list, sizeof (mds_layout_t),
   2720  10016    Thomas 	    offsetof(mds_layout_t, mpd_layouts_next));
   2721  10016    Thomas 
   2722  10016    Thomas 	return (TRUE);
   2723  10016    Thomas }
   2724  10016    Thomas 
   2725  10016    Thomas 
   2726  10016    Thomas /*ARGSUSED*/
   2727  10016    Thomas static void
   2728  10016    Thomas mds_mpd_destroy(rfs4_entry_t u_entry)
   2729  10016    Thomas {
   2730  10016    Thomas 	mds_mpd_t		*mp = (mds_mpd_t *)u_entry;
   2731  10016    Thomas 	nfs_server_instance_t	*instp;
   2732  10016    Thomas 
   2733  10016    Thomas 	instp = dbe_to_instp(u_entry->dbe);
   2734  10016    Thomas 	ASSERT(instp->mds_mpd_id_space != NULL);
   2735  10016    Thomas 	id_free(instp->mds_mpd_id_space, mp->mpd_id);
   2736  10016    Thomas 
   2737  10016    Thomas 	kmem_free(mp->mpd_encoded_val, mp->mpd_encoded_len);
   2738  10016    Thomas 
   2739  10016    Thomas #ifdef	DEBUG
   2740  10016    Thomas 	/*
   2741  10016    Thomas 	 * We should never get here as the layouts
   2742  10016    Thomas 	 * entries should be holding a reference against
   2743  10016    Thomas 	 * this mpd!
   2744  10016    Thomas 	 */
   2745  10016    Thomas 	rw_enter(&instp->mds_mpd_lock, RW_WRITER);
   2746  10016    Thomas 	ASSERT(list_is_empty(&mp->mpd_layouts_list));
   2747  10016    Thomas 	rw_exit(&instp->mds_mpd_lock);
   2748  10016    Thomas #endif
   2749  10016    Thomas 	list_destroy(&mp->mpd_layouts_list);
   2750   6741  th199096 }
   2751   6741  th199096 
   2752   6741  th199096 /*
   2753   6741  th199096  * The OTW device id is 128bits in length, we however are
   2754   6741  th199096  * still using a uint_32 internally.
   2755   6741  th199096  */
   2756   6741  th199096 mds_mpd_t *
   2757  10016    Thomas mds_find_mpd(nfs_server_instance_t *instp, id_t id)
   2758  10016    Thomas {
   2759  10016    Thomas 	mds_mpd_t *mp;
   2760   6741  th199096 	bool_t create = FALSE;
   2761   6741  th199096 
   2762  10016    Thomas 	mp = (mds_mpd_t *)rfs4_dbsearch(instp->mds_mpd_idx,
   2763   6741  th199096 	    (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID);
   2764  10016    Thomas 	return (mp);
   2765  10016    Thomas }
   2766  10016    Thomas 
   2767  10016    Thomas /*
   2768  10016    Thomas  * Plop kernel deviceid into the 128bit OTW deviceid
   2769  10016    Thomas  */
   2770  10016    Thomas void
   2771  10016    Thomas mds_set_deviceid(id_t did, deviceid4 *otw_id)
   2772   6741  th199096 {
   2773   6741  th199096 	ba_devid_t d;
   2774   6741  th199096 
   2775   6741  th199096 	bzero(&d, sizeof (d));
   2776   6741  th199096 	d.i.did = did;
   2777   6741  th199096 	bcopy(&d, otw_id, sizeof (d));
   2778   6741  th199096 }
   2779   6741  th199096 
   2780   6741  th199096 /*
   2781   7739   jwahlig  * Used by the walker to populate the deviceid list.
   2782   6741  th199096  */
   2783   6741  th199096 void
   2784   6741  th199096 mds_mpd_list(rfs4_entry_t entry, void *arg)
   2785   6741  th199096 {
   2786  10016    Thomas 	mds_mpd_t		*mp = (mds_mpd_t *)entry;
   2787  10016    Thomas 	mds_device_list_t	*mdl = (mds_device_list_t *)arg;
   2788   6741  th199096 
   2789   6741  th199096 	deviceid4   *dlip;
   2790   6741  th199096 
   2791   6741  th199096 	/*
   2792   6741  th199096 	 * If this entry is invalid or we should skip it
   2793   6741  th199096 	 * go to the next one..
   2794   6741  th199096 	 */
   2795  10447    Thomas 	if (rfs4_dbe_skip_or_invalid(mp->mpd_dbe))
   2796  10447    Thomas 		return;
   2797  10447    Thomas 
   2798  10447    Thomas 	dlip = &(mdl->mdl_dl[mdl->mdl_count]);
   2799   6741  th199096 
   2800  10016    Thomas 	mds_set_deviceid(mp->mpd_id, dlip);
   2801   6741  th199096 
   2802   6741  th199096 	/*
   2803   6741  th199096 	 * bump to the next devlist_item4
   2804   6741  th199096 	 */
   2805  10447    Thomas 	mdl->mdl_count++;
   2806   6741  th199096 }
   2807   6741  th199096 
   2808  10016    Thomas /* ARGSUSED */
   2809   7811    Thomas ds_addrlist_t *
   2810  10016    Thomas mds_find_ds_addrlist_by_mds_sid(nfs_server_instance_t *instp,
   2811  10016    Thomas     mds_sid *sid)
   2812  10016    Thomas {
   2813  10016    Thomas 	ds_addrlist_t	*dp = NULL;
   2814  10016    Thomas 	ds_guid_info_t	*pgi;
   2815  10016    Thomas 	ds_owner_t	*dop;
   2816  10016    Thomas 	ds_guid_t	guid;
   2817  10016    Thomas 
   2818  10016    Thomas 	/*
   2819  10016    Thomas 	 * Warning, do not, do not ever, free this guid!
   2820  10016    Thomas 	 */
   2821  10016    Thomas 	guid.stor_type = ZFS;
   2822  10016    Thomas 	guid.ds_guid_u.zfsguid.zfsguid_len = sid->len;
   2823  10016    Thomas 	guid.ds_guid_u.zfsguid.zfsguid_val = sid->val;
   2824  10016    Thomas 
   2825  10016    Thomas 	/*
   2826  10016    Thomas 	 * First we need to find the ds_guid_info_t which
   2827  10016    Thomas 	 * corresponds to this mds_sid.
   2828  10016    Thomas 	 */
   2829  10016    Thomas 	pgi = mds_find_ds_guid_info_by_id(&guid);
   2830  10016    Thomas 	if (pgi == NULL)
   2831  10016    Thomas 		return (NULL);
   2832  10016    Thomas 
   2833  10016    Thomas 	dop = pgi->ds_owner;
   2834  10016    Thomas 	if (!dop)
   2835  10016    Thomas 		goto error;
   2836  10016    Thomas 
   2837  10016    Thomas 	/*
   2838  10016    Thomas 	 * XXX: If a ds_owner has multiple addresses, then just grab the first
   2839  10016    Thomas 	 * we find.
   2840  10016    Thomas 	 */
   2841  10016    Thomas 	dp = list_head(&dop->ds_addrlist_list);
   2842  10016    Thomas 	if (dp)
   2843  10016    Thomas 		rfs4_dbe_hold(dp->dbe);
   2844  10016    Thomas 
   2845  10016    Thomas error:
   2846  10016    Thomas 
   2847  10016    Thomas 	rfs4_dbe_rele(pgi->dbe);
   2848   6741  th199096 	return (dp);
   2849   6741  th199096 }
   2850   6741  th199096 
   2851   7811    Thomas ds_addrlist_t *
   2852   7811    Thomas mds_find_ds_addrlist(nfs_server_instance_t *instp, uint32_t id)
   2853   6741  th199096 {
   2854   7811    Thomas 	ds_addrlist_t *dp;
   2855   6741  th199096 	bool_t create = FALSE;
   2856   6741  th199096 
   2857   7811    Thomas 	dp = (ds_addrlist_t *)rfs4_dbsearch(instp->ds_addrlist_idx,
   2858   6741  th199096 	    (void *)(uintptr_t)id, &create, NULL, RFS4_DBS_VALID);
   2859   6741  th199096 	return (dp);
   2860   6741  th199096 }
   2861   6741  th199096 
   2862   9404    Thomas void
   2863   9404    Thomas mds_ds_addrlist_rele(ds_addrlist_t *dp)
   2864   9404    Thomas {
   2865   9404    Thomas 	rfs4_dbe_rele(dp->dbe);
   2866   9404    Thomas }
   2867   6741  th199096 
   2868   6741  th199096 /*
   2869   6741  th199096  */
   2870   6741  th199096 static uint32_t
   2871   6741  th199096 mds_str_hash(void *key)
   2872   6741  th199096 {
   2873   6741  th199096 	char *addr = (char *)key;
   2874   6741  th199096 	int i;
   2875   6741  th199096 	uint32_t hash = 0;
   2876   6741  th199096 
   2877   6741  th199096 	for (i = 0; addr[i]; i++) {
   2878   6741  th199096 		hash <<= 1;
   2879   6741  th199096 		hash += (uint_t)addr[i];
   2880   6741  th199096 	}
   2881   6741  th199096 
   2882   6741  th199096 	return (hash);
   2883   6741  th199096 }
   2884   6741  th199096 
   2885  10016    Thomas static uint32_t
   2886  10016    Thomas mds_utf8string_hash(void *key)
   2887  10016    Thomas {
   2888  10016    Thomas 	utf8string *obj = (utf8string *)key;
   2889  10016    Thomas 	int i;
   2890  10016    Thomas 	uint32_t hash = 0;
   2891  10016    Thomas 
   2892  10016    Thomas 	for (i = 0; i < obj->utf8string_len; i++) {
   2893  10016    Thomas 		hash <<= 1;
   2894  10016    Thomas 		hash += (uint_t)obj->utf8string_val[i];
   2895  10016    Thomas 	}
   2896  10016    Thomas 
   2897  10016    Thomas 	return (hash);
   2898  10016    Thomas }
   2899  10016    Thomas 
   2900  10016    Thomas static bool_t
   2901  10016    Thomas rfs41_invalid_expiry(rfs4_entry_t entry)
   2902   9404    Thomas {
   2903   9404    Thomas 	if (rfs4_dbe_is_invalid(entry->dbe))
   2904   9404    Thomas 		return (TRUE);
   2905   9404    Thomas 
   2906   9404    Thomas 	return (FALSE);
   2907   9404    Thomas }
   2908   6741  th199096 
   2909  10016    Thomas static uint32_t
   2910  10016    Thomas ds_addrlist_addrkey_hash(void *key)
   2911  10016    Thomas {
   2912  10016    Thomas 	return ((uint32_t)(uintptr_t)key);
   2913  10016    Thomas }
   2914  10016    Thomas 
   2915  10016    Thomas static void *
   2916  10016    Thomas ds_addrlist_addrkey_mkkey(rfs4_entry_t entry)
   2917   6741  th199096 {
   2918   7811    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)entry;
   2919   6741  th199096 
   2920  10016    Thomas 	return (&dp->ds_addr_key);
   2921  10016    Thomas }
   2922  10016    Thomas 
   2923  10016    Thomas /*
   2924  10016    Thomas  * Only compare the address portion and not the
   2925  10016    Thomas  * port info. We do this because the DS may
   2926  10016    Thomas  * have rebooted and gotten a different port
   2927  10016    Thomas  * number.
   2928  10016    Thomas  *
   2929  10016    Thomas  * XXX: What happens if we have multiple DSes
   2930  10016    Thomas  * on one box? I.e., a valid case for the same
   2931  10016    Thomas  * IP, but different ports?
   2932  10016    Thomas  */
   2933   6741  th199096 static int
   2934  10016    Thomas ds_addrlist_addrkey_compare(rfs4_entry_t entry, void *key)
   2935   6741  th199096 {
   2936   7811    Thomas 	ds_addrlist_t *dp = (ds_addrlist_t *)entry;
   2937  10016    Thomas 	uint64_t addr_key = *(uint64_t *)key;
   2938  10016    Thomas 
   2939  10016    Thomas 	return (addr_key == dp->ds_addr_key);
   2940  10016    Thomas }
   2941   6741  th199096 
   2942   6741  th199096 /*
   2943   7739   jwahlig  * Data-server information (ds_owner)  tables and indexes.
   2944   6741  th199096  */
   2945   6741  th199096 static uint32_t
   2946   7739   jwahlig ds_owner_hash(void *key)
   2947   6741  th199096 {
   2948   6741  th199096 	return ((uint32_t)(uintptr_t)key);
   2949   6741  th199096 }
   2950   6741  th199096 
   2951   6741  th199096 static bool_t
   2952   7739   jwahlig ds_owner_compare(rfs4_entry_t entry, void *key)
   2953   6741  th199096 {
   2954   7739   jwahlig 	ds_owner_t *dop = (ds_owner_t *)entry;
   2955   6741  th199096 
   2956   7739   jwahlig 	return (dop->ds_id == (int)(uintptr_t)key);
   2957   6741  th199096 
   2958   6741  th199096 }
   2959   6741  th199096 
   2960   6741  th199096 static void *
   2961   7739   jwahlig ds_owner_mkkey(rfs4_entry_t entry)
   2962   6741  th199096 {
   2963   7739   jwahlig 	ds_owner_t *dop = (ds_owner_t *)entry;
   2964   6741  th199096 
   2965   7739   jwahlig 	return ((void *)(uintptr_t)dop->ds_id);
   2966   6741  th199096 }
   2967   6741  th199096 
   2968   6741  th199096 static bool_t
   2969   7739   jwahlig ds_owner_inst_compare(rfs4_entry_t entry, void *key)
   2970   6741  th199096 {
   2971   7739   jwahlig 	ds_owner_t *dop = (ds_owner_t *)entry;
   2972   6741  th199096 
   2973   7739   jwahlig 	return (strcmp(dop->identity, key) == 0);
   2974   6741  th199096 }
   2975   6741  th199096 
   2976   6741  th199096 static void *
   2977   7739   jwahlig ds_owner_inst_mkkey(rfs4_entry_t entry)
   2978   6741  th199096 {
   2979   7739   jwahlig 	ds_owner_t *dop = (ds_owner_t *)entry;
   2980   7739   jwahlig 	return (dop->identity);
   2981   6741  th199096 }
   2982   6741  th199096 
   2983   6741  th199096 /*ARGSUSED*/
   2984   6741  th199096 static bool_t