Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/systm.h>
     34 #include <sys/cred.h>
     35 #include <sys/buf.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vnode.h>
     38 #include <sys/uio.h>
     39 #include <sys/errno.h>
     40 #include <sys/sysmacros.h>
     41 #include <sys/statvfs.h>
     42 #include <sys/kmem.h>
     43 #include <sys/dirent.h>
     44 #include <sys/cmn_err.h>
     45 #include <sys/disp.h>
     46 #include <sys/debug.h>
     47 #include <sys/systeminfo.h>
     48 #include <sys/flock.h>
     49 #include <sys/pathname.h>
     50 #include <sys/nbmlock.h>
     51 #include <sys/share.h>
     52 #include <sys/atomic.h>
     53 #include <sys/policy.h>
     54 #include <sys/fem.h>
     55 #include <sys/sdt.h>
     56 #include <sys/ddi.h>
     57 #include <sys/modctl.h>
     58 #include <sys/timod.h>
     59 #include <sys/id_space.h>
     60 
     61 #include <rpc/types.h>
     62 #include <rpc/auth.h>
     63 #include <rpc/rpcsec_gss.h>
     64 #include <rpc/svc.h>
     65 
     66 #include <nfs/nfs.h>
     67 #include <nfs/export.h>
     68 #include <nfs/lm.h>
     69 #include <nfs/nfs4.h>
     70 
     71 #include <sys/strsubr.h>
     72 #include <sys/strsun.h>
     73 
     74 #include <inet/common.h>
     75 #include <inet/ip.h>
     76 #include <inet/ip6.h>
     77 
     78 #include <sys/tsol/label.h>
     79 #include <sys/tsol/tndb.h>
     80 
     81 #include <nfs/nfs4_attrmap.h>
     82 #include <nfs/nfs4_srv_attr.h>
     83 #include <nfs/mds_state.h>
     84 #include <nfs/mds_odl.h>
     85 
     86 #include <nfs/nfs41_filehandle.h>
     87 #include <nfs/ctl_mds_clnt.h>
     88 
     89 #include <nfs/spe_impl.h>
     90 
     91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
     92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
     93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
     94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
     95 
     96 int mds_strict_seqid = 0;
     97 
     98 static void ping_cb_null_thr(mds_session_t *);
     99 
    100 /* End of Tunables */
    101 
    102 /*
    103  * Used to bump the stateid4.seqid value and show changes in the stateid
    104  */
    105 #define	next_stateid(sp) (++(sp)->v41_bits.chgseq)
    106 
    107 /*
    108  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
    109  *	This is used to return NFS4ERR_TOOSMALL when clients specify
    110  *	maxcount that isn't large enough to hold the smallest possible
    111  *	XDR encoded dirent.
    112  *
    113  *	    sizeof cookie (8 bytes) +
    114  *	    sizeof name_len (4 bytes) +
    115  *	    sizeof smallest (padded) name (4 bytes) +
    116  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
    117  *	    sizeof attrlist4_len (4 bytes) +
    118  *	    sizeof next boolean (4 bytes)
    119  *
    120  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
    121  * the smallest possible entry4 (assumes no attrs requested).
    122  *	sizeof nfsstat4 (4 bytes) +
    123  *	sizeof verifier4 (8 bytes) +
    124  *	sizeof entry4list bool (4 bytes) +
    125  *	sizeof entry4 	(36 bytes) +
    126  *	sizeof eof bool  (4 bytes)
    127  *
    128  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
    129  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
    130  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
    131  *	required for a given name length.  MAXNAMELEN is the maximum
    132  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
    133  *	macros are to allow for . and .. entries -- just a minor tweak to try
    134  *	and guarantee that buffer we give to VOP_READDIR will be large enough
    135  *	to hold ., .., and the largest possible solaris dirent64.
    136  */
    137 #define	RFS4_MINLEN_ENTRY4 36
    138 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
    139 #define	RFS4_MINLEN_RDDIR_BUF \
    140 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
    141 
    142 /*
    143  * It would be better to pad to 4 bytes since that's what XDR would do,
    144  * but the dirents UFS gives us are already padded to 8, so just take
    145  * what we're given.  Dircount is only a hint anyway.  Currently the
    146  * solaris kernel is ASCII only, so there's no point in calling the
    147  * UTF8 functions.
    148  *
    149  * dirent64: named padded to provide 8 byte struct alignment
    150  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
    151  *
    152  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
    153  *
    154  */
    155 #define	DIRENT64_TO_DIRCOUNT(dp) \
    156 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
    157 
    158 /*
    159  * types of label comparison
    160  */
    161 #define	EQUALITY_CHECK	0
    162 #define	DOMINANCE_CHECK	1
    163 
    164 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
    165 
    166 void		rfs4_init_compound_state(struct compound_state *);
    167 
    168 static void	nullfree(nfs_resop4 *, compound_state_t *);
    169 static void	mds_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    170 			compound_state_t *);
    171 static void	mds_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    172 			compound_state_t *);
    173 static void	mds_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    174 			compound_state_t *);
    175 static void	mds_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    176 			compound_state_t *);
    177 static void	mds_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    178 			compound_state_t *);
    179 static void	mds_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    180 			compound_state_t *);
    181 static void	mds_op_create_free(nfs_resop4 *resop);
    182 static void	mds_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
    183 				struct svc_req *, compound_state_t *);
    184 static void	mds_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    185 			compound_state_t *);
    186 static void	mds_op_getattr_free(nfs_resop4 *, compound_state_t *);
    187 static void	mds_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    188 			compound_state_t *);
    189 static void	mds_op_getfh_free(nfs_resop4 *, compound_state_t *);
    190 static void	mds_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    191 			compound_state_t *);
    192 static void	mds_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    193 			compound_state_t *);
    194 static void	mds_lock_denied_free(nfs_resop4 *, compound_state_t *);
    195 static void	mds_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    196 			compound_state_t *);
    197 static void	mds_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    198 			compound_state_t *);
    199 static void	mds_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    200 			compound_state_t *);
    201 static void	mds_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    202 			compound_state_t *);
    203 static void	mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
    204 				struct svc_req *req, compound_state_t *);
    205 static void	mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    206 			compound_state_t *);
    207 static void	mds_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    208 			compound_state_t *);
    209 static void	mds_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
    210 			struct svc_req *, compound_state_t *);
    211 static void	mds_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    212 			compound_state_t *);
    213 static void	mds_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    214 			compound_state_t *);
    215 static void	mds_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    216 			compound_state_t *);
    217 static void	mds_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    218 			compound_state_t *);
    219 static void	mds_op_read_free(nfs_resop4 *, compound_state_t *);
    220 void		mds_op_readdir(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    221 			compound_state_t *);
    222 static void	mds_op_readdir_free(nfs_resop4 *, compound_state_t *);
    223 static void	mds_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    224 			compound_state_t *);
    225 static void	mds_op_readlink_free(nfs_resop4 *, compound_state_t *);
    226 static void	mds_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
    227 			struct svc_req *, compound_state_t *);
    228 static void	mds_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    229 			compound_state_t *);
    230 static void	mds_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    231 			compound_state_t *);
    232 static void	mds_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    233 			compound_state_t *);
    234 static void	mds_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    235 			compound_state_t *);
    236 static void	mds_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    237 			compound_state_t *);
    238 static void	mds_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    239 			compound_state_t *);
    240 static void	mds_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    241 			compound_state_t *);
    242 static void	mds_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    243 			compound_state_t *);
    244 static void	mds_op_exchange_id(nfs_argop4 *, nfs_resop4 *,
    245 			struct svc_req *, compound_state_t *);
    246 static void	mds_op_exid_free(nfs_resop4 *, compound_state_t *);
    247 static void	mds_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    248 			compound_state_t *);
    249 static void	mds_op_secinfonn(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    250 			compound_state_t *);
    251 nfsstat4	do_rfs4_op_secinfo(struct compound_state *, char *, int,
    252     SECINFO4res *);
    253 
    254 static void	mds_op_secinfo_free(nfs_resop4 *, compound_state_t *);
    255 
    256 static void	mds_op_backchannel_ctl(nfs_argop4 *, nfs_resop4 *,
    257 			struct svc_req *, compound_state_t *);
    258 static void	mds_op_bind_conn_to_session(nfs_argop4 *, nfs_resop4 *,
    259 			struct svc_req *, compound_state_t *);
    260 static void	mds_op_create_clientid(nfs_argop4 *, nfs_resop4 *,
    261 			struct svc_req *, compound_state_t *);
    262 static void	mds_op_create_session(nfs_argop4 *, nfs_resop4 *,
    263 			struct svc_req *, compound_state_t *);
    264 static void	mds_op_destroy_session(nfs_argop4 *, nfs_resop4 *,
    265 			struct svc_req *, compound_state_t *);
    266 static void	mds_op_sequence(nfs_argop4 *, nfs_resop4 *,
    267 			struct svc_req *, compound_state_t *);
    268 
    269 static void mds_op_get_devlist(nfs_argop4 *, nfs_resop4 *,
    270 		struct svc_req *, compound_state_t *);
    271 
    272 static void mds_op_get_devinfo(nfs_argop4 *, nfs_resop4 *,
    273 		struct svc_req *, compound_state_t *);
    274 
    275 static void mds_op_layout_get(nfs_argop4 *, nfs_resop4 *,
    276 		struct svc_req *, compound_state_t *);
    277 static void mds_op_layout_get_free(nfs_resop4 *, compound_state_t *);
    278 
    279 static void mds_op_layout_commit(nfs_argop4 *, nfs_resop4 *,
    280 		struct svc_req *, compound_state_t *);
    281 
    282 static void mds_op_layout_return(nfs_argop4 *, nfs_resop4 *,
    283 		struct svc_req *, compound_state_t *);
    284 
    285 static void mds_op_reclaim_complete(nfs_argop4 *, nfs_resop4 *,
    286     struct svc_req *, compound_state_t *);
    287 
    288 static int	seq_chk_limits(nfs_argop4 *, nfs_resop4 *, compound_state_t *);
    289 
    290 nfsstat4 check_open_access(uint32_t,
    291 			struct compound_state *, struct svc_req *);
    292 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
    293 
    294 static void	mds_free_reply(nfs_resop4 *, compound_state_t *);
    295 
    296 vnode_t *do_rfs4_op_mknod(CREATE4args *, CREATE4res *, struct svc_req *,
    297 			struct compound_state *, vattr_t *, char *);
    298 
    299 nfsstat4 rfs4_do_lock(rfs4_lo_state_t *, nfs_lock_type4, seqid4,
    300 		offset4, length4, cred_t *, nfs_resop4 *);
    301 
    302 rfs4_lo_state_t *mds_findlo_state_by_owner(rfs4_lockowner_t *,
    303 	    rfs4_state_t *, bool_t *);
    304 
    305 bool_t in_flavor_list(int, int *, int);
    306 
    307 nfsstat4 attrmap4_to_vattrmask(attrmap4 *, struct nfs4_svgetit_arg *);
    308 
    309 nfsstat4 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *);
    310 
    311 nfsstat4 do_rfs4_op_getattr(attrmap4 *, fattr4 *, struct nfs4_svgetit_arg *);
    312 
    313 nfsstat4 do_rfs4_op_lookup(char *, uint_t, struct svc_req *,
    314 		struct compound_state *);
    315 
    316 rfs4_lockowner_t *mds_findlockowner_by_pid(nfs_server_instance_t *, pid_t);
    317 
    318 mds_session_t *mds_findsession_by_id(nfs_server_instance_t *, sessionid4);
    319 
    320 rfs4_openowner_t *mds_findopenowner(nfs_server_instance_t *, open_owner4 *,
    321     bool_t *);
    322 
    323 static void	mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    324 			compound_state_t *);
    325 
    326 extern mds_mpd_t *mds_find_mpd(nfs_server_instance_t *, id_t);
    327 extern void rfs41_lo_seqid(stateid_t *);
    328 extern void mds_delete_layout(vnode_t *);
    329 extern void mds_clean_grants_by_fsid(rfs4_client_t *, vnode_t *);
    330 extern mds_layout_t *mds_add_layout(layout_core_t *lc);
    331 
    332 nfsstat4
    333 create_vnode(vnode_t *, char *,  vattr_t *, createmode4, timespec32_t *,
    334     cred_t *, vnode_t **, bool_t *);
    335 
    336 
    337 /* HACKERY */
    338 nfsstat4 rfs4_get_all_state(struct compound_state *, stateid4 *,
    339     rfs4_state_t **, rfs4_deleg_state_t **, rfs4_lo_state_t **);
    340 
    341 void rfs4_ss_clid(struct compound_state *, rfs4_client_t *, struct svc_req *);
    342 void rfs4_ss_chkclid(struct compound_state *, rfs4_client_t *);
    343 
    344 int layout_match(stateid_t, stateid4, nfsstat4 *);
    345 
    346 extern stateid4 special0;
    347 extern stateid4 special1;
    348 
    349 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
    350 			stateid4_cmp(id, &special1))
    351 
    352 void rfs4_cn_release(compound_state_t *);
    353 
    354 mds_layout_grant_t *rfs41_findlogrant(struct compound_state *,
    355     rfs4_file_t *, rfs4_client_t *, bool_t *);
    356 void rfs41_lo_grant_rele(mds_layout_grant_t *);
    357 mds_ever_grant_t *rfs41_findevergrant(rfs4_client_t *, vnode_t *, bool_t *);
    358 void rfs41_ever_grant_rele(mds_ever_grant_t *);
    359 
    360 static uint32_t compute_use_pnfs_flags(uint32_t);
    361 
    362 /* ARGSUSED */
    363 static void
    364 mds_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    365 	compound_state_t *cs)
    366 {
    367 	DTRACE_NFSV4_1(op__notsup__start,
    368 	    strcut compound_state *, cs);
    369 
    370 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
    371 
    372 	DTRACE_NFSV4_1(op__notsup__done,
    373 	    struct compound_state *, cs);
    374 }
    375 
    376 /* ARGSUSED */
    377 static void
    378 mds_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    379 	compound_state_t *cs)
    380 {
    381 	DTRACE_NFSV4_1(op__illegal__start,
    382 	    struct compound_state *, cs);
    383 
    384 	*cs->statusp =
    385 	    *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_OP_ILLEGAL;
    386 
    387 	DTRACE_NFSV4_1(op__illegal__done,
    388 	    struct compound_state *, cs);
    389 }
    390 
    391 /* ARGSUSED */
    392 static void
    393 mds_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    394 	compound_state_t *cs)
    395 {
    396 	DTRACE_NFSV4_1(op__inval__start,
    397 	    struct compound_state *, cs);
    398 
    399 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
    400 
    401 	DTRACE_NFSV4_1(op__inval__done,
    402 	    struct compound_state *, cs);
    403 }
    404 
    405 /*ARGSUSED*/
    406 static void
    407 nullfree(nfs_resop4 *resop, compound_state_t *cs)
    408 {
    409 }
    410 
    411 static op_disp_tbl_t mds_disptab[] = {
    412 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 0"},
    413 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 1"},
    414 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 2"},
    415 	{mds_op_access, nullfree, DISP_OP_MDS, "ACCESS"},
    416 	{mds_op_close, nullfree, DISP_OP_MDS, "CLOSE"},
    417 	{mds_op_commit, nullfree, DISP_OP_BOTH, "COMMIT"},
    418 	{mds_op_create, nullfree, DISP_OP_MDS, "CREATE"},
    419 	{mds_op_inval, nullfree, DISP_OP_BAD, "BAD Op 7"},
    420 	{mds_op_delegreturn, nullfree, DISP_OP_MDS, "DELEGRETURN"},
    421 	{mds_op_getattr, mds_op_getattr_free, DISP_OP_MDS, "GETATTR"},
    422 	{mds_op_getfh, mds_op_getfh_free, DISP_OP_MDS, "GETFH"},
    423 	{mds_op_link, nullfree, DISP_OP_MDS, "LINK"},
    424 	{mds_op_lock, mds_lock_denied_free, DISP_OP_MDS, "LOCK"},
    425 	{mds_op_lockt, mds_lock_denied_free,  DISP_OP_MDS, "LOCKT"},
    426 	{mds_op_locku, nullfree,  DISP_OP_MDS, "LOCKU"},
    427 	{mds_op_lookup, nullfree,  DISP_OP_MDS, "LOOKUP"},
    428 	{mds_op_lookupp, nullfree,  DISP_OP_MDS, "LOOKUPP"},
    429 	{mds_op_nverify, nullfree,  DISP_OP_MDS, "NVERIFY"},
    430 	{mds_op_open, mds_free_reply,  DISP_OP_MDS, "OPEN"},
    431 	{mds_op_openattr, nullfree,  DISP_OP_MDS, "OPENATTR"},
    432 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 20"},
    433 	{mds_op_open_downgrade, nullfree,  DISP_OP_MDS, "OPEN_DOWNGRADE"},
    434 	{mds_op_putfh, nullfree, DISP_OP_BOTH, "PUTFH"},
    435 	{mds_op_putpubfh, nullfree,  DISP_OP_MDS, "PUTPUBFH"},
    436 	{mds_op_putrootfh, nullfree,  DISP_OP_MDS, "PUTROOTFH"},
    437 	{mds_op_read, mds_op_read_free, DISP_OP_BOTH, "READ"},
    438 	{mds_op_readdir, mds_op_readdir_free,  DISP_OP_MDS, "READDIR"},
    439 	{mds_op_readlink, mds_op_readlink_free,  DISP_OP_MDS, "READLINK"},
    440 	{mds_op_remove, nullfree,  DISP_OP_MDS, "REMOVE"},
    441 	{mds_op_rename, nullfree,  DISP_OP_MDS, "RENAME"},
    442 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 30"},
    443 	{mds_op_restorefh, nullfree,  DISP_OP_MDS, "RESTOREFH"},
    444 	{mds_op_savefh, nullfree,  DISP_OP_MDS, "SAVEFH"},
    445 	{mds_op_secinfo, mds_op_secinfo_free,  DISP_OP_MDS, "SECINFO"},
    446 	{mds_op_setattr, nullfree,  DISP_OP_MDS, "SETATTR"},
    447 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 35"},
    448 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 36"},
    449 	{mds_op_verify, nullfree,  DISP_OP_MDS, "VERIFY"},
    450 	{mds_op_write, nullfree, DISP_OP_BOTH, "WRITE"},
    451 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 39"},
    452 	{mds_op_backchannel_ctl, nullfree,  DISP_OP_BOTH, "BACKCHANNEL_CTL"},
    453 	{mds_op_bind_conn_to_session, nullfree,
    454 	    DISP_OP_BOTH, "BIND_CONN_TO_SESS"},
    455 	{mds_op_exchange_id, mds_op_exid_free,  DISP_OP_BOTH, "EXCHANGE_ID"},
    456 	{mds_op_create_session, nullfree,  DISP_OP_BOTH, "CREATE_SESS"},
    457 	{mds_op_destroy_session, nullfree,  DISP_OP_BOTH, "DESTROY_SESS"},
    458 	{mds_op_illegal, nullfree,  DISP_OP_MDS, "FREE_STATEID"},
    459 	{mds_op_illegal, nullfree,  DISP_OP_MDS, "GET_DIR_DELEG"},
    460 	{mds_op_get_devinfo, nullfree,  DISP_OP_MDS, "GET_DEVINFO"},
    461 	{mds_op_get_devlist, nullfree,  DISP_OP_MDS, "GET_DEVLIST"},
    462 	{mds_op_layout_commit, nullfree,  DISP_OP_MDS, "LAYOUT_COMMIT"},
    463 	{mds_op_layout_get, mds_op_layout_get_free,  DISP_OP_MDS, "LAYOUT_GET"},
    464 	{mds_op_layout_return, nullfree,  DISP_OP_MDS, "LAYOUT_RETURN"},
    465 	{mds_op_secinfonn, nullfree,
    466 	    DISP_OP_BOTH, "SECINFO_NONAME"},
    467 	{mds_op_sequence, nullfree,  DISP_OP_BOTH, "SEQUENCE"},
    468 	{mds_op_notsup, nullfree,  DISP_OP_BOTH, "SET_SSV"},
    469 	{mds_op_notsup, nullfree,  DISP_OP_MDS, "TEST_STATEID"},
    470 	{mds_op_notsup, nullfree,  DISP_OP_MDS, "WANT_DELEG"},
    471 	{mds_op_notsup, nullfree,  DISP_OP_BOTH, "DESTROY_CLIENTID"},
    472 	{mds_op_reclaim_complete, nullfree,  DISP_OP_MDS, "RECLAIM_COMPLETE"}
    473 };
    474 
    475 static uint_t mds_disp_cnt = sizeof (mds_disptab) / sizeof (mds_disptab[0]);
    476 
    477 #define	OP_ILLEGAL_IDX (mds_disp_cnt)
    478 
    479 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
    480 
    481 #ifdef	nextdp
    482 #undef nextdp
    483 #endif
    484 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
    485 
    486 /*ARGSUSED*/
    487 static void
    488 mds_op_readdir_free(nfs_resop4 *resop, compound_state_t *cs)
    489 {
    490 	/* Common function used for NFSv4.0 and NFSv4.1 */
    491 	rfs4_op_readdir_free(resop);
    492 }
    493 
    494 /*ARGSUSED*/
    495 static void
    496 mds_op_secinfo_free(nfs_resop4 *resop, compound_state_t *cs)
    497 {
    498 	/* Common function used for NFSv4.0 and NFSv4.1 */
    499 	rfs4_op_secinfo_free(resop);
    500 }
    501 
    502 /*
    503  */
    504 void
    505 mds_srvrfini(void)
    506 {
    507 	/* some shutdown stuff for the minor verson 1 server */
    508 }
    509 
    510 nfsstat4	rfs4_state_has_access(rfs4_state_t *, int, vnode_t *);
    511 int		rfs4_verify_attr(struct nfs4_svgetit_arg *, attrmap4 *,
    512 		    struct nfs4_ntov_table *);
    513 
    514 
    515 /*
    516  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
    517  * the file is being truncated, return NFS4_OK if allowed or approriate
    518  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
    519  * the associated file will be done if the I/O is not consistent with any
    520  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
    521  * as reader or writer as appropriate. rfs4_op_open will accquire the
    522  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
    523  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
    524  * deleg parameter, we will return whether a write delegation is held by
    525  * the client associated with this stateid.
    526  * If the server instance associated with the relevant client is in its
    527  * grace period, return NFS4ERR_GRACE.
    528  */
    529 nfsstat4
    530 mds_validate_stateid(int mode, struct compound_state *cs, vnode_t *vp,
    531     stateid4 *stateid, bool_t trunc, bool_t *deleg, bool_t do_access)
    532 {
    533 	rfs4_file_t *fp;
    534 	bool_t create = FALSE;
    535 	rfs4_state_t *sp;
    536 	rfs4_deleg_state_t *dsp;
    537 	rfs4_lo_state_t *lsp;
    538 	stateid_t *id = (stateid_t *)stateid;
    539 	nfsstat4 stat = NFS4_OK;
    540 
    541 	if (ISSPECIAL(stateid)) {
    542 		fp = rfs4_findfile(cs->instp, vp, NULL, &create);
    543 		if (fp == NULL)
    544 			return (NFS4_OK);
    545 		if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) {
    546 			rfs4_file_rele(fp);
    547 			return (NFS4_OK);
    548 		}
    549 		if (mode == FWRITE ||
    550 		    fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) {
    551 			rfs4_recall_deleg(fp, trunc, NULL);
    552 			rfs4_file_rele(fp);
    553 			return (NFS4ERR_DELAY);
    554 		}
    555 		rfs4_file_rele(fp);
    556 		return (NFS4_OK);
    557 	}
    558 
    559 	stat = rfs4_get_all_state(cs, stateid, &sp, &dsp, &lsp);
    560 	if (stat != NFS4_OK)
    561 		return (stat);
    562 
    563 	/*
    564 	 * Ordering of the following 'if' statements is specific
    565 	 * since rfs4_get_all_state() may return a value for sp and
    566 	 * lsp. First we check lsp, then 'fall' through to sp.
    567 	 */
    568 	if (lsp != NULL) {
    569 		/* Is associated server instance in its grace period? */
    570 		if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
    571 			rfs4_lo_state_rele(lsp, FALSE);
    572 			if (sp != NULL)
    573 				rfs4_dbe_rele(sp->rs_dbe);
    574 			return (NFS4ERR_GRACE);
    575 		}
    576 
    577 		if (lsp->rls_lockid.v41_bits.chgseq != 0) {
    578 			/* Seqid in the future? - that's bad */
    579 			if (lsp->rls_lockid.v41_bits.chgseq <
    580 			    id->v41_bits.chgseq) {
    581 				rfs4_lo_state_rele(lsp, FALSE);
    582 				if (sp != NULL)
    583 					rfs4_dbe_rele(sp->rs_dbe);
    584 				return (NFS4ERR_BAD_STATEID);
    585 			}
    586 			/* Seqid in the past? - that's old */
    587 			if (lsp->rls_lockid.v41_bits.chgseq >
    588 			    id->v41_bits.chgseq) {
    589 				rfs4_lo_state_rele(lsp, FALSE);
    590 				if (sp != NULL)
    591 					rfs4_dbe_rele(sp->rs_dbe);
    592 				return (NFS4ERR_OLD_STATEID);
    593 			}
    594 		}
    595 
    596 		/* Ensure specified filehandle matches */
    597 		if (lsp->rls_state->rs_finfo->rf_vp != vp) {
    598 			rfs4_lo_state_rele(lsp, FALSE);
    599 			if (sp != NULL)
    600 				rfs4_dbe_rele(sp->rs_dbe);
    601 			return (NFS4ERR_BAD_STATEID);
    602 		}
    603 		rfs4_lo_state_rele(lsp, FALSE);
    604 	}
    605 
    606 	/*
    607 	 * Stateid provided was an "open" or via the lock stateid
    608 	 */
    609 	if (sp != NULL) {
    610 		/*
    611 		 * only check if the passed in stateid was an OPENID,
    612 		 * ie. Skip if we got here via the LOCKID.
    613 		 */
    614 		if (id->v41_bits.type == OPENID) {
    615 			/* Is associated server instance in its grace period? */
    616 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
    617 				rfs4_dbe_rele(sp->rs_dbe);
    618 				return (NFS4ERR_GRACE);
    619 			}
    620 
    621 			if (sp->rs_stateid.v41_bits.chgseq != 0) {
    622 				/* Seqid in the future? - that's bad */
    623 				if (sp->rs_stateid.v41_bits.chgseq <
    624 				    id->v41_bits.chgseq) {
    625 					rfs4_dbe_rele(sp->rs_dbe);
    626 					return (NFS4ERR_BAD_STATEID);
    627 				}
    628 				/* Seqid in the past - that's old */
    629 				if (sp->rs_stateid.v41_bits.chgseq >
    630 				    id->v41_bits.chgseq) {
    631 					rfs4_dbe_rele(sp->rs_dbe);
    632 					return (NFS4ERR_OLD_STATEID);
    633 				}
    634 			}
    635 
    636 			/* Ensure specified filehandle matches */
    637 			if (sp->rs_finfo->rf_vp != vp) {
    638 				rfs4_dbe_rele(sp->rs_dbe);
    639 				return (NFS4ERR_BAD_STATEID);
    640 			}
    641 		}
    642 		if (sp->rs_owner->ro_need_confirm) {
    643 			rfs4_dbe_rele(sp->rs_dbe);
    644 			return (NFS4ERR_BAD_STATEID);
    645 		}
    646 
    647 		if (sp->rs_closed == TRUE) {
    648 			rfs4_dbe_rele(sp->rs_dbe);
    649 			return (NFS4ERR_OLD_STATEID);
    650 		}
    651 
    652 		if (do_access)
    653 			stat = rfs4_state_has_access(sp, mode, vp);
    654 		else
    655 			stat = NFS4_OK;
    656 
    657 		/*
    658 		 * Return whether this state has write
    659 		 * delegation if desired
    660 		 */
    661 		if (deleg &&
    662 		    (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE))
    663 			*deleg = TRUE;
    664 
    665 		/*
    666 		 * We got a valid stateid, so we update the
    667 		 * lease on the client. Ideally we would like
    668 		 * to do this after the calling op succeeds,
    669 		 * but for now this will be good
    670 		 * enough. Callers of this routine are
    671 		 * currently insulated from the state stuff.
    672 		 */
    673 		rfs4_update_lease(sp->rs_owner->ro_client);
    674 
    675 		/*
    676 		 * If a delegation is present on this file and
    677 		 * this is a WRITE, then update the lastwrite
    678 		 * time to indicate that activity is present.
    679 		 */
    680 		if (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE &&
    681 		    mode == FWRITE) {
    682 			sp->rs_finfo->rf_dinfo->rd_time_lastwrite =
    683 			    gethrestime_sec();
    684 		}
    685 
    686 		rfs4_dbe_rele(sp->rs_dbe);
    687 		return (stat);
    688 	}
    689 
    690 	if (dsp != NULL) {
    691 		/* Is associated server instance in its grace period? */
    692 		if (rfs4_clnt_in_grace(dsp->rds_client)) {
    693 			rfs4_deleg_state_rele(dsp);
    694 			return (NFS4ERR_GRACE);
    695 		}
    696 
    697 		if ((dsp->rds_delegid.v41_bits.chgseq != 0) &&
    698 		    (dsp->rds_delegid.v41_bits.chgseq != id->v41_bits.chgseq)) {
    699 			rfs4_deleg_state_rele(dsp);
    700 			return (NFS4ERR_BAD_STATEID);
    701 		}
    702 
    703 		/* Ensure specified filehandle matches */
    704 		if (dsp->rds_finfo->rf_vp != vp) {
    705 			rfs4_deleg_state_rele(dsp);
    706 			return (NFS4ERR_BAD_STATEID);
    707 		}
    708 		/*
    709 		 * Return whether this state has write
    710 		 * delegation if desired
    711 		 */
    712 		if (deleg &&
    713 		    (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE))
    714 			*deleg = TRUE;
    715 
    716 		rfs4_update_lease(dsp->rds_client);
    717 
    718 		/*
    719 		 * If a delegation is present on this file and
    720 		 * this is a WRITE, then update the lastwrite
    721 		 * time to indicate that activity is present.
    722 		 */
    723 		if (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE &&
    724 		    mode == FWRITE) {
    725 			dsp->rds_finfo->rf_dinfo->rd_time_lastwrite =
    726 			    gethrestime_sec();
    727 		}
    728 
    729 		/*
    730 		 * XXX - what happens if this is a WRITE and the
    731 		 * delegation type of for READ.
    732 		 */
    733 		rfs4_deleg_state_rele(dsp);
    734 
    735 		return (stat);
    736 	}
    737 	/*
    738 	 * If we got this far, something bad happened
    739 	 */
    740 	return (NFS4ERR_BAD_STATEID);
    741 }
    742 
    743 nfsstat4
    744 mds_setattr(attrmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
    745     stateid4 *stateid)
    746 {
    747 	int error = 0;
    748 	struct nfs4_svgetit_arg sarg;
    749 	bool_t trunc;
    750 
    751 	nfsstat4 status = NFS4_OK;
    752 	cred_t *cr = cs->cr;
    753 	vnode_t *vp = cs->vp;
    754 	struct nfs4_ntov_table ntov;
    755 	struct statvfs64 sb;
    756 	struct vattr bva;
    757 	struct flock64 bf;
    758 	int in_crit = 0;
    759 	uint_t saved_mask = 0;
    760 	caller_context_t ct;
    761 	attrvers_t avers;
    762 	struct nfs4_ntov_map *nvmap;
    763 
    764 	avers = RFS4_ATTRVERS(cs);
    765 	nvmap = NFS4_NTOV_MAP(avers);
    766 	*resp = NFS4_EMPTY_ATTRMAP(avers);
    767 	sarg.sbp = &sb;
    768 	nfs4_ntov_table_init(&ntov, avers);
    769 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
    770 	    NFS4ATTR_SETIT);
    771 	if (status != NFS4_OK) {
    772 		/*
    773 		 * failed set attrs
    774 		 */
    775 		goto done;
    776 	}
    777 
    778 	if (sarg.vap->va_mask == 0 && ! ATTR_ISSET(fattrp->attrmask, ACL) &&
    779 	    ! ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) {
    780 		/*
    781 		 * no further work to be done
    782 		 */
    783 		goto done;
    784 	}
    785 
    786 	ct.cc_sysid = 0;
    787 	ct.cc_pid = 0;
    788 	ct.cc_caller_id = cs->instp->caller_id;
    789 	ct.cc_flags = CC_DONTBLOCK;
    790 
    791 	/*
    792 	 * If we got a request to set the ACL and the MODE, only
    793 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
    794 	 * to change any other bits, along with setting an ACL,
    795 	 * gives NFS4ERR_INVAL.
    796 	 */
    797 	if (ATTR_ISSET(fattrp->attrmask, ACL) &&
    798 	    ATTR_ISSET(fattrp->attrmask, MODE)) {
    799 		vattr_t va;
    800 
    801 		va.va_mask = AT_MODE;
    802 		error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
    803 		if (error) {
    804 			status = puterrno4(error);
    805 			goto done;
    806 		}
    807 		if ((sarg.vap->va_mode ^ va.va_mode) &
    808 		    ~(VSUID | VSGID | VSVTX)) {
    809 			status = NFS4ERR_INVAL;
    810 			goto done;
    811 		}
    812 	}
    813 
    814 	/* Check stateid only if size has been set */
    815 	if (sarg.vap->va_mask & AT_SIZE) {
    816 		trunc = (sarg.vap->va_size == 0);
    817 		status = mds_validate_stateid(FWRITE,
    818 		    cs, cs->vp, stateid, trunc,
    819 		    &cs->deleg, sarg.vap->va_mask & AT_SIZE);
    820 		if (status != NFS4_OK)
    821 			goto done;
    822 	}
    823 
    824 	/* XXX start of possible race with delegations */
    825 
    826 	/*
    827 	 * We need to specially handle size changes because it is
    828 	 * possible for the client to create a file with read-only
    829 	 * modes, but with the file opened for writing. If the client
    830 	 * then tries to set the file size, e.g. ftruncate(3C),
    831 	 * fcntl(F_FREESP), the normal access checking done in
    832 	 * VOP_SETATTR would prevent the client from doing it even though
    833 	 * it should be allowed to do so.  To get around this, we do the
    834 	 * access checking for ourselves and use VOP_SPACE which doesn't
    835 	 * do the access checking.
    836 	 * Also the client should not be allowed to change the file
    837 	 * size if there is a conflicting non-blocking mandatory lock in
    838 	 * the region of the change.
    839 	 */
    840 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
    841 		u_offset_t offset;
    842 		ssize_t length;
    843 
    844 		/*
    845 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
    846 		 * before returning, sarg.vap->va_mask is used to
    847 		 * generate the setattr reply bitmap.  We also clear
    848 		 * AT_SIZE below before calling VOP_SPACE.  For both
    849 		 * of these cases, the va_mask needs to be saved here
    850 		 * and restored after calling VOP_SETATTR.
    851 		 */
    852 		saved_mask = sarg.vap->va_mask;
    853 
    854 		/*
    855 		 * Check any possible conflict due to NBMAND locks.
    856 		 * Get into critical region before VOP_GETATTR, so the
    857 		 * size attribute is valid when checking conflicts.
    858 		 */
    859 		if (nbl_need_check(vp)) {
    860 			nbl_start_crit(vp, RW_READER);
    861 			in_crit = 1;
    862 		}
    863 
    864 		bva.va_mask = AT_UID|AT_SIZE;
    865 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
    866 			status = puterrno4(error);
    867 			goto done;
    868 		}
    869 
    870 		if (in_crit) {
    871 			if (sarg.vap->va_size < bva.va_size) {
    872 				offset = sarg.vap->va_size;
    873 				length = bva.va_size - sarg.vap->va_size;
    874 			} else {
    875 				offset = bva.va_size;
    876 				length = sarg.vap->va_size - bva.va_size;
    877 			}
    878 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
    879 			    &ct)) {
    880 				status = NFS4ERR_LOCKED;
    881 				goto done;
    882 			}
    883 		}
    884 
    885 		if (crgetuid(cr) == bva.va_uid) {
    886 			sarg.vap->va_mask &= ~AT_SIZE;
    887 			bf.l_type = F_WRLCK;
    888 			bf.l_whence = 0;
    889 			bf.l_start = (off64_t)sarg.vap->va_size;
    890 			bf.l_len = 0;
    891 			bf.l_sysid = 0;
    892 			bf.l_pid = 0;
    893 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
    894 			    (offset_t)sarg.vap->va_size, cr, &ct);
    895 		}
    896 	}
    897 
    898 	if (!error && sarg.vap->va_mask != 0)
    899 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
    900 
    901 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
    902 	if (saved_mask & AT_SIZE)
    903 		sarg.vap->va_mask |= AT_SIZE;
    904 
    905 	/*
    906 	 * If an ACL was being set, it has been delayed until now,
    907 	 * in order to set the mode (via the VOP_SETATTR() above) first.
    908 	 */
    909 	if (! error && ATTR_ISSET(fattrp->attrmask, ACL)) {
    910 		int i;
    911 
    912 		for (i = 0; i < ntov.attrcnt; i++)
    913 			if (ntov.amap[i] == FATTR4_ACL)
    914 				break;
    915 		if (i < ntov.attrcnt) {
    916 			error = (*nvmap[FATTR4_ACL].sv_getit)(NFS4ATTR_SETIT,
    917 			    &sarg, &ntov.na[i]);
    918 			if (error == 0) {
    919 				ATTR_SET(*resp, ACL);
    920 			} else if (error == ENOTSUP) {
    921 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
    922 				status = NFS4ERR_ATTRNOTSUPP;
    923 				goto done;
    924 			}
    925 		} else {
    926 			error = EINVAL;
    927 		}
    928 	}
    929 
    930 	if (! error && ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) {
    931 		/*
    932 		 * Store layout hint.  Layout hint will be stored
    933 		 * in file struct (which means it can only be set
    934 		 * when the file is open).  If layout hint is allowed
    935 		 * for files not open, then it must be stored
    936 		 * persistently.
    937 		 *
    938 		 * status assignment placates lint.  it will
    939 		 * be replaced with code to store the layout
    940 		 * hint.
    941 		 */
    942 		status = NFS4_OK;
    943 	}
    944 
    945 	if (error) {
    946 		/* check if a monitor detected a delegation conflict */
    947 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
    948 			status = NFS4ERR_DELAY;
    949 		else
    950 			status = puterrno4(error);
    951 
    952 		/*
    953 		 * Set the response bitmap when setattr failed.
    954 		 * If VOP_SETATTR partially succeeded, test by doing a
    955 		 * VOP_GETATTR on the object and comparing the data
    956 		 * to the setattr arguments.
    957 		 */
    958 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
    959 	} else {
    960 		/*
    961 		 * Force modified metadata out to stable storage.
    962 		 */
    963 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
    964 		/*
    965 		 * Set response bitmap
    966 		 */
    967 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
    968 	}
    969 
    970 	/* Return early and already have a NFSv4 error */
    971 done:
    972 	/*
    973 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
    974 	 * conversion sets both readable and writeable NFS4 attrs
    975 	 * for AT_MTIME and AT_ATIME.  The line below masks out
    976 	 * unrequested attrs from the setattr result bitmap.  This
    977 	 * is placed after the done: label to catch the ATTRNOTSUP
    978 	 * case.
    979 	 */
    980 	ATTRMAP_MASK(*resp, fattrp->attrmask);
    981 
    982 	if (in_crit)
    983 		nbl_end_crit(vp);
    984 
    985 	nfs4_ntov_table_free(&ntov, &sarg);
    986 
    987 	return (status);
    988 }
    989 
    990 /* ARGSUSED */
    991 void
    992 mds_op_secinfonn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    993     compound_state_t *cs)
    994 {
    995 	SECINFO_NO_NAME4res *respnn;
    996 	int dotdot;
    997 
    998 	DTRACE_NFSV4_1(op__secinfo__no__name__start,
    999 	    struct compound_state *, cs);
   1000 
   1001 	respnn = &resop->nfs_resop4_u.opsecinfo_no_name;
   1002 
   1003 	/*
   1004 	 * Current file handle (cfh) should have been set before
   1005 	 * getting into this function. If not, return error.
   1006 	 */
   1007 	if (cs->vp == NULL) {
   1008 		*cs->statusp = respnn->status = NFS4ERR_NOFILEHANDLE;
   1009 		goto final;
   1010 	}
   1011 
   1012 	dotdot =
   1013 	    (argop->nfs_argop4_u.opsecinfo_no_name == SECINFO_STYLE4_PARENT);
   1014 
   1015 	*cs->statusp = respnn->status = do_rfs4_op_secinfo(cs, NULL,
   1016 	    dotdot, (SECINFO4res *)respnn);
   1017 
   1018 final:
   1019 	DTRACE_NFSV4_2(op__secinfo__no__name__done,
   1020 	    struct compound_state *, cs,
   1021 	    SECINFO_NO_NAME4res *, respnn);
   1022 }
   1023 
   1024 /* ARGSUSED */
   1025 void
   1026 mds_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1027     compound_state_t *cs)
   1028 {
   1029 	SECINFO4res *resp;
   1030 	utf8string *utfnm;
   1031 	uint_t len, dotdot;
   1032 	char *nm;
   1033 
   1034 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
   1035 
   1036 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
   1037 	    SECINFO4args *, args);
   1038 
   1039 	resp = &resop->nfs_resop4_u.opsecinfo;
   1040 
   1041 	/*
   1042 	 * Current file handle (cfh) should have been set before
   1043 	 * getting into this function. If not, return error.
   1044 	 */
   1045 	if (cs->vp == NULL) {
   1046 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1047 		goto final;
   1048 	}
   1049 	if (cs->vp->v_type != VDIR) {
   1050 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1051 		goto final;
   1052 	}
   1053 
   1054 	/*
   1055 	 * Verify the component name. If failed, error out, but
   1056 	 * do not error out if the component name is a "..".
   1057 	 * SECINFO will return its parents secinfo data for SECINFO "..".
   1058 	 */
   1059 	utfnm = &argop->nfs_argop4_u.opsecinfo.name;
   1060 	if (!utf8_dir_verify(utfnm)) {
   1061 		if (utfnm->utf8string_len != 2 ||
   1062 		    utfnm->utf8string_val[0] != '.' ||
   1063 		    utfnm->utf8string_val[1] != '.') {
   1064 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1065 			goto final;
   1066 		}
   1067 		dotdot = 1;
   1068 	} else
   1069 		dotdot = 0;
   1070 
   1071 	nm = utf8_to_str(utfnm, &len, NULL);
   1072 	if (nm == NULL) {
   1073 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1074 		goto final;
   1075 	}
   1076 
   1077 	if (len > MAXNAMELEN) {
   1078 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1079 		kmem_free(nm, len);
   1080 		goto final;
   1081 	}
   1082 
   1083 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, dotdot, resp);
   1084 
   1085 	kmem_free(nm, len);
   1086 
   1087 final:
   1088 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
   1089 	    SECINFO4res *, resp);
   1090 }
   1091 
   1092 /*
   1093  * verify and nverify are exactly the same, except that nverify
   1094  * succeeds when some argument changed, and verify succeeds when
   1095  * when none changed.
   1096  */
   1097 
   1098 /* ARGSUSED */
   1099 void
   1100 mds_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1101     compound_state_t *cs)
   1102 {
   1103 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
   1104 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
   1105 	int error;
   1106 	struct nfs4_svgetit_arg sarg;
   1107 	struct statvfs64 sb;
   1108 	struct nfs4_ntov_table ntov;
   1109 
   1110 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
   1111 	    VERIFY4args *, args);
   1112 
   1113 	if (cs->vp == NULL) {
   1114 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1115 		goto final;
   1116 	}
   1117 
   1118 	sarg.sbp = &sb;
   1119 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1120 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   1121 	    &sarg, &ntov, NFS4ATTR_VERIT);
   1122 	if (resp->status != NFS4_OK) {
   1123 		/*
   1124 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   1125 		 * so could return -1 for "no match".
   1126 		 */
   1127 		if (resp->status == -1)
   1128 			resp->status = NFS4ERR_NOT_SAME;
   1129 		goto done;
   1130 	}
   1131 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   1132 	switch (error) {
   1133 	case 0:
   1134 		resp->status = NFS4_OK;
   1135 		break;
   1136 	case -1:
   1137 		resp->status = NFS4ERR_NOT_SAME;
   1138 		break;
   1139 	default:
   1140 		resp->status = puterrno4(error);
   1141 		break;
   1142 	}
   1143 done:
   1144 	*cs->statusp = resp->status;
   1145 	nfs4_ntov_table_free(&ntov, &sarg);
   1146 
   1147 final:
   1148 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
   1149 	    VERIFY4res *, resp);
   1150 }
   1151 
   1152 /* ARGSUSED */
   1153 void
   1154 mds_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1155     compound_state_t *cs)
   1156 {
   1157 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
   1158 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
   1159 	int error;
   1160 	struct nfs4_svgetit_arg sarg;
   1161 	struct statvfs64 sb;
   1162 	struct nfs4_ntov_table ntov;
   1163 
   1164 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
   1165 	    NVERIFY4args *, args);
   1166 
   1167 	if (cs->vp == NULL) {
   1168 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1169 		goto final;
   1170 	}
   1171 	sarg.sbp = &sb;
   1172 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1173 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   1174 	    &sarg, &ntov, NFS4ATTR_VERIT);
   1175 	if (resp->status != NFS4_OK) {
   1176 		/*
   1177 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   1178 		 * so could return -1 for "no match".
   1179 		 */
   1180 		if (resp->status == -1)
   1181 			resp->status = NFS4_OK;
   1182 		goto done;
   1183 	}
   1184 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   1185 	switch (error) {
   1186 	case 0:
   1187 		resp->status = NFS4ERR_SAME;
   1188 		break;
   1189 	case -1:
   1190 		resp->status = NFS4_OK;
   1191 		break;
   1192 	default:
   1193 		resp->status = puterrno4(error);
   1194 		break;
   1195 	}
   1196 done:
   1197 	*cs->statusp = resp->status;
   1198 	nfs4_ntov_table_free(&ntov, &sarg);
   1199 
   1200 final:
   1201 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   1202 	    NVERIFY4res *, resp);
   1203 
   1204 }
   1205 
   1206 /* ARGSUSED */
   1207 void
   1208 mds_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1209     compound_state_t *cs)
   1210 {
   1211 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
   1212 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
   1213 	int error;
   1214 	vnode_t *vp;
   1215 	struct vattr va;
   1216 	int checkwriteperm;
   1217 	cred_t *cr = cs->cr;
   1218 	bslabel_t *clabel, *slabel;
   1219 	ts_label_t *tslabel;
   1220 	boolean_t admin_low_client;
   1221 
   1222 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
   1223 	    ACCESS4args *, args);
   1224 
   1225 	if (cs->vp == NULL) {
   1226 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1227 		goto final;
   1228 	}
   1229 
   1230 	ASSERT(cr != NULL);
   1231 
   1232 	vp = cs->vp;
   1233 
   1234 	/*
   1235 	 * If the file system is exported read only, it is not appropriate
   1236 	 * to check write permissions for regular files and directories.
   1237 	 * Special files are interpreted by the client, so the underlying
   1238 	 * permissions are sent back to the client for interpretation.
   1239 	 */
   1240 	if (rdonly4(cs->exi, cs->vp, req) &&
   1241 	    (vp->v_type == VREG || vp->v_type == VDIR))
   1242 		checkwriteperm = 0;
   1243 	else
   1244 		checkwriteperm = 1;
   1245 
   1246 	/*
   1247 	 * XXX
   1248 	 * We need the mode so that we can correctly determine access
   1249 	 * permissions relative to a mandatory lock file.  Access to
   1250 	 * mandatory lock files is denied on the server, so it might
   1251 	 * as well be reflected to the server during the open.
   1252 	 */
   1253 	va.va_mask = AT_MODE;
   1254 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1255 	if (error) {
   1256 		*cs->statusp = resp->status = puterrno4(error);
   1257 		goto final;
   1258 	}
   1259 	resp->access = 0;
   1260 	resp->supported = 0;
   1261 
   1262 	if (is_system_labeled()) {
   1263 		ASSERT(req->rq_label != NULL);
   1264 		clabel = req->rq_label;
   1265 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
   1266 		    "got client label from request(1)",
   1267 		    struct svc_req *, req);
   1268 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   1269 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
   1270 				*cs->statusp = resp->status = puterrno4(EACCES);
   1271 				goto final;
   1272 			}
   1273 			slabel = label2bslabel(tslabel);
   1274 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
   1275 			    char *, "got server label(1) for vp(2)",
   1276 			    bslabel_t *, slabel, vnode_t *, vp);
   1277 
   1278 			admin_low_client = B_FALSE;
   1279 		} else
   1280 			admin_low_client = B_TRUE;
   1281 	}
   1282 
   1283 	if (args->access & ACCESS4_READ) {
   1284 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
   1285 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1286 		    (!is_system_labeled() || admin_low_client ||
   1287 		    bldominates(clabel, slabel)))
   1288 			resp->access |= ACCESS4_READ;
   1289 		resp->supported |= ACCESS4_READ;
   1290 	}
   1291 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
   1292 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1293 		if (!error && (!is_system_labeled() || admin_low_client ||
   1294 		    bldominates(clabel, slabel)))
   1295 			resp->access |= ACCESS4_LOOKUP;
   1296 		resp->supported |= ACCESS4_LOOKUP;
   1297 	}
   1298 	if (checkwriteperm &&
   1299 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
   1300 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1301 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1302 		    (!is_system_labeled() || admin_low_client ||
   1303 		    blequal(clabel, slabel)))
   1304 			resp->access |=
   1305 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
   1306 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
   1307 	}
   1308 
   1309 	if (checkwriteperm &&
   1310 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
   1311 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1312 		if (!error && (!is_system_labeled() || admin_low_client ||
   1313 		    blequal(clabel, slabel)))
   1314 			resp->access |= ACCESS4_DELETE;
   1315 		resp->supported |= ACCESS4_DELETE;
   1316 	}
   1317 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
   1318 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1319 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1320 		    (!is_system_labeled() || admin_low_client ||
   1321 		    bldominates(clabel, slabel)))
   1322 			resp->access |= ACCESS4_EXECUTE;
   1323 		resp->supported |= ACCESS4_EXECUTE;
   1324 	}
   1325 
   1326 	if (is_system_labeled() && !admin_low_client)
   1327 		label_rele(tslabel);
   1328 
   1329 	*cs->statusp = resp->status = NFS4_OK;
   1330 
   1331 final:
   1332 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
   1333 	    ACCESS4res *, resp);
   1334 }
   1335 
   1336 /* ARGSUSED */
   1337 static void
   1338 mds_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1339 	compound_state_t *cs)
   1340 {
   1341 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
   1342 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
   1343 	int error;
   1344 	vnode_t *vp = cs->vp;
   1345 	cred_t *cr = cs->cr;
   1346 	vattr_t va;
   1347 	caller_context_t ct;
   1348 
   1349 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
   1350 	    COMMIT4args *, args);
   1351 
   1352 	if (vp == NULL) {
   1353 		/*
   1354 		 * XXX kludge: fake the commit if we are a data server
   1355 		 * This will be replaced once we have nnop_commit().
   1356 		 */
   1357 		if (cs->nn != NULL) {
   1358 			*cs->statusp = resp->status = NFS4_OK;
   1359 			resp->writeverf = cs->instp->Write4verf;
   1360 		} else {
   1361 			*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1362 		}
   1363 		goto final;
   1364 	}
   1365 	if (cs->access == CS_ACCESS_DENIED) {
   1366 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1367 		goto final;
   1368 	}
   1369 
   1370 	if (args->offset + args->count < args->offset) {
   1371 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1372 		goto final;
   1373 	}
   1374 
   1375 	ct.cc_sysid = 0;
   1376 	ct.cc_pid = 0;
   1377 	ct.cc_caller_id = cs->instp->caller_id;
   1378 	ct.cc_flags = CC_DONTBLOCK;
   1379 
   1380 	va.va_mask = AT_UID;
   1381 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
   1382 
   1383 	/*
   1384 	 * If we can't get the attributes, then we can't do the
   1385 	 * right access checking.  So, we'll fail the request.
   1386 	 */
   1387 	if (error) {
   1388 		*cs->statusp = resp->status = puterrno4(error);
   1389 		goto final;
   1390 	}
   1391 	if (rdonly4(cs->exi, cs->vp, req)) {
   1392 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1393 		goto final;
   1394 	}
   1395 
   1396 	if (vp->v_type != VREG) {
   1397 		if (vp->v_type == VDIR)
   1398 			resp->status = NFS4ERR_ISDIR;
   1399 		else
   1400 			resp->status = NFS4ERR_INVAL;
   1401 		*cs->statusp = resp->status;
   1402 		goto final;
   1403 	}
   1404 
   1405 	if (crgetuid(cr) != va.va_uid &&
   1406 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, &ct))) {
   1407 		*cs->statusp = resp->status = puterrno4(error);
   1408 		goto final;
   1409 	}
   1410 
   1411 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, &ct);
   1412 	if (!error)
   1413 		error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
   1414 
   1415 	if (error) {
   1416 		*cs->statusp = resp->status = puterrno4(error);
   1417 		goto final;
   1418 	}
   1419 
   1420 	*cs->statusp = resp->status = NFS4_OK;
   1421 	resp->writeverf = cs->instp->Write4verf;
   1422 
   1423 final:
   1424 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
   1425 	    COMMIT4res *, resp);
   1426 }
   1427 
   1428 /*
   1429  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
   1430  * was completed. It does the nfsv4 create for special files.
   1431  *
   1432  * nfsv4 create is used to create non-regular files. For regular files,
   1433  * use nfsv4 open.
   1434  */
   1435 /* ARGSUSED */
   1436 static void
   1437 mds_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1438 	compound_state_t *cs)
   1439 {
   1440 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
   1441 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
   1442 	int error;
   1443 	struct vattr bva, iva, iva2, ava, *vap;
   1444 	cred_t *cr = cs->cr;
   1445 	vnode_t *dvp = cs->vp;
   1446 	vnode_t *vp = NULL;
   1447 	vnode_t *realvp;
   1448 	char *nm, *lnm;
   1449 	uint_t len, llen;
   1450 	int syncval = 0;
   1451 	struct nfs4_svgetit_arg sarg;
   1452 	struct nfs4_ntov_table ntov;
   1453 	struct statvfs64 sb;
   1454 	nfsstat4 status;
   1455 	caller_context_t ct;
   1456 
   1457 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
   1458 	    CREATE4args *, args);
   1459 
   1460 	resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1461 
   1462 	if (dvp == NULL) {
   1463 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1464 		goto final;
   1465 	}
   1466 
   1467 	/*
   1468 	 * If there is an unshared filesystem mounted on this vnode,
   1469 	 * do not allow to create an object in this directory.
   1470 	 */
   1471 	if (vn_ismntpt(dvp)) {
   1472 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1473 		goto final;
   1474 	}
   1475 
   1476 	ct.cc_sysid = 0;
   1477 	ct.cc_pid = 0;
   1478 	ct.cc_caller_id = cs->instp->caller_id;
   1479 	ct.cc_flags = CC_DONTBLOCK;
   1480 
   1481 	/* Verify that type is correct */
   1482 	switch (args->type) {
   1483 	case NF4LNK:
   1484 	case NF4BLK:
   1485 	case NF4CHR:
   1486 	case NF4SOCK:
   1487 	case NF4FIFO:
   1488 	case NF4DIR:
   1489 		break;
   1490 	default:
   1491 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1492 		goto final;
   1493 	};
   1494 
   1495 	if (cs->access == CS_ACCESS_DENIED) {
   1496 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1497 		goto final;
   1498 	}
   1499 	if (dvp->v_type != VDIR) {
   1500 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1501 		goto final;
   1502 	}
   1503 	if (!utf8_dir_verify(&args->objname)) {
   1504 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1505 		goto final;
   1506 	}
   1507 
   1508 	if (rdonly4(cs->exi, cs->vp, req)) {
   1509 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1510 		goto final;
   1511 	}
   1512 
   1513 	/*
   1514 	 * Name of newly created object
   1515 	 */
   1516 	nm = utf8_to_fn(&args->objname, &len, NULL);
   1517 	if (nm == NULL) {
   1518 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1519 		goto final;
   1520 	}
   1521 
   1522 	if (len > MAXNAMELEN) {
   1523 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1524 		kmem_free(nm, len);
   1525 		goto final;
   1526 	}
   1527 
   1528 	sarg.sbp = &sb;
   1529 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1530 
   1531 	status = do_rfs4_set_attrs(&resp->attrset,
   1532 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
   1533 
   1534 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
   1535 		status = NFS4ERR_INVAL;
   1536 
   1537 	if (status != NFS4_OK) {
   1538 		*cs->statusp = resp->status = status;
   1539 		kmem_free(nm, len);
   1540 		nfs4_ntov_table_free(&ntov, &sarg);
   1541 
   1542 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1543 		goto final;
   1544 	}
   1545 
   1546 	/* Get "before" change value */
   1547 	bva.va_mask = AT_CTIME|AT_SEQ;
   1548 	error = VOP_GETATTR(dvp, &bva, 0, cr, &ct);
   1549 	if (error) {
   1550 		*cs->statusp = resp->status = puterrno4(error);
   1551 		kmem_free(nm, len);
   1552 		nfs4_ntov_table_free(&ntov, &sarg);
   1553 
   1554 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1555 		goto final;
   1556 	}
   1557 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
   1558 
   1559 	vap = sarg.vap;
   1560 
   1561 	/*
   1562 	 * Set default initial values for attributes when not specified
   1563 	 * in createattrs.
   1564 	 */
   1565 	if ((vap->va_mask & AT_UID) == 0) {
   1566 		vap->va_uid = crgetuid(cr);
   1567 		vap->va_mask |= AT_UID;
   1568 	}
   1569 	if ((vap->va_mask & AT_GID) == 0) {
   1570 		vap->va_gid = crgetgid(cr);
   1571 		vap->va_mask |= AT_GID;
   1572 	}
   1573 
   1574 	vap->va_mask |= AT_TYPE;
   1575 	switch (args->type) {
   1576 	case NF4DIR:
   1577 		vap->va_type = VDIR;
   1578 		if ((vap->va_mask & AT_MODE) == 0) {
   1579 			vap->va_mode = 0700;	/* default: owner rwx only */
   1580 			vap->va_mask |= AT_MODE;
   1581 		}
   1582 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr, &ct, 0, NULL);
   1583 		if (error)
   1584 			break;
   1585 
   1586 		/*
   1587 		 * Get the initial "after" sequence number, if it fails,
   1588 		 * set to zero
   1589 		 */
   1590 		iva.va_mask = AT_SEQ;
   1591 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1592 			iva.va_seq = 0;
   1593 		break;
   1594 	case NF4LNK:
   1595 		vap->va_type = VLNK;
   1596 		if ((vap->va_mask & AT_MODE) == 0) {
   1597 			vap->va_mode = 0700;	/* default: owner rwx only */
   1598 			vap->va_mask |= AT_MODE;
   1599 		}
   1600 
   1601 		/*
   1602 		 * symlink names must be treated as data
   1603 		 */
   1604 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
   1605 
   1606 		if (lnm == NULL) {
   1607 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1608 			kmem_free(nm, len);
   1609 			nfs4_ntov_table_free(&ntov, &sarg);
   1610 			resp->attrset =
   1611 			    NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1612 			goto final;
   1613 		}
   1614 
   1615 		if (llen > MAXPATHLEN) {
   1616 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1617 			kmem_free(nm, len);
   1618 			kmem_free(lnm, llen);
   1619 			nfs4_ntov_table_free(&ntov, &sarg);
   1620 			resp->attrset =
   1621 			    NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1622 			goto final;
   1623 		}
   1624 
   1625 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, &ct, 0);
   1626 		if (lnm != NULL)
   1627 			kmem_free(lnm, llen);
   1628 		if (error)
   1629 			break;
   1630 
   1631 		/*
   1632 		 * Get the initial "after" sequence number, if it fails,
   1633 		 * set to zero
   1634 		 */
   1635 		iva.va_mask = AT_SEQ;
   1636 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1637 			iva.va_seq = 0;
   1638 
   1639 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr,
   1640 		    &ct, 0, NULL);
   1641 		if (error)
   1642 			break;
   1643 
   1644 		/*
   1645 		 * va_seq is not safe over VOP calls, check it again
   1646 		 * if it has changed zero out iva to force atomic = FALSE.
   1647 		 */
   1648 		iva2.va_mask = AT_SEQ;
   1649 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, &ct) ||
   1650 		    iva2.va_seq != iva.va_seq)
   1651 			iva.va_seq = 0;
   1652 		break;
   1653 	default:
   1654 		/*
   1655 		 * probably a special file.
   1656 		 */
   1657 		if ((vap->va_mask & AT_MODE) == 0) {
   1658 			vap->va_mode = 0600;	/* default: owner rw only */
   1659 			vap->va_mask |= AT_MODE;
   1660 		}
   1661 		syncval = FNODSYNC;
   1662 		/*
   1663 		 * We know this will only generate one VOP call
   1664 		 */
   1665 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
   1666 
   1667 		if (vp == NULL) {
   1668 			kmem_free(nm, len);
   1669 			nfs4_ntov_table_free(&ntov, &sarg);
   1670 			resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1671 			goto final;
   1672 		}
   1673 
   1674 		/*
   1675 		 * Get the initial "after" sequence number, if it fails,
   1676 		 * set to zero
   1677 		 */
   1678 		iva.va_mask = AT_SEQ;
   1679 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1680 			iva.va_seq = 0;
   1681 
   1682 		break;
   1683 	}
   1684 	kmem_free(nm, len);
   1685 
   1686 	if (error) {
   1687 		*cs->statusp = resp->status = puterrno4(error);
   1688 	}
   1689 
   1690 	/*
   1691 	 * Force modified data and metadata out to stable storage.
   1692 	 */
   1693 	(void) VOP_FSYNC(dvp, 0, cr, &ct);
   1694 
   1695 	if (resp->status != NFS4_OK) {
   1696 		if (vp != NULL)
   1697 			VN_RELE(vp);
   1698 		nfs4_ntov_table_free(&ntov, &sarg);
   1699 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1700 		goto final;
   1701 	}
   1702 
   1703 	/*
   1704 	 * Finish setup of cinfo response, "before" value already set.
   1705 	 * Get "after" change value, if it fails, simply return the
   1706 	 * before value.
   1707 	 */
   1708 	ava.va_mask = AT_CTIME|AT_SEQ;
   1709 	if (VOP_GETATTR(dvp, &ava, 0, cr, &ct)) {
   1710 		ava.va_ctime = bva.va_ctime;
   1711 		ava.va_seq = 0;
   1712 	}
   1713 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
   1714 
   1715 	/*
   1716 	 * True verification that object was created with correct
   1717 	 * attrs is impossible.  The attrs could have been changed
   1718 	 * immediately after object creation.  If attributes did
   1719 	 * not verify, the only recourse for the server is to
   1720 	 * destroy the object.  Maybe if some attrs (like gid)
   1721 	 * are set incorrectly, the object should be destroyed;
   1722 	 * however, seems bad as a default policy.  Do we really
   1723 	 * want to destroy an object over one of the times not
   1724 	 * verifying correctly?  For these reasons, the server
   1725 	 * currently sets bits in attrset for createattrs
   1726 	 * that were set; however, no verification is done.
   1727 	 *
   1728 	 * vmask_to_nmask accounts for vattr bits set on create
   1729 	 *	[do_rfs4_set_attrs() only sets resp bits for
   1730 	 *	 non-vattr/vfs bits.]
   1731 	 * Mask off any bits set by default so as not to return
   1732 	 * more attrset bits than were requested in createattrs
   1733 	 */
   1734 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset,
   1735 	    RFS4_ATTRVERS(cs));
   1736 	ATTRMAP_MASK(resp->attrset, args->createattrs.attrmask);
   1737 	nfs4_ntov_table_free(&ntov, &sarg);
   1738 
   1739 	error = mknfs41_fh(&cs->fh, vp, cs->exi);
   1740 	if (error) {
   1741 		*cs->statusp = resp->status = puterrno4(error);
   1742 	}
   1743 
   1744 	/*
   1745 	 * The cinfo.atomic = TRUE only if we got no errors, we have
   1746 	 * non-zero va_seq's, and it has incremented by exactly one
   1747 	 * during the creation and it didn't change during the VOP_LOOKUP
   1748 	 * or VOP_FSYNC.
   1749 	 */
   1750 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
   1751 	    iva.va_seq == (bva.va_seq + 1) &&
   1752 	    iva.va_seq == ava.va_seq)
   1753 		resp->cinfo.atomic = TRUE;
   1754 	else
   1755 		resp->cinfo.atomic = FALSE;
   1756 
   1757 	/*
   1758 	 * Force modified metadata out to stable storage.
   1759 	 *
   1760 	 * if a underlying vp exists, pass it to VOP_FSYNC
   1761 	 */
   1762 	if (VOP_REALVP(vp, &realvp, &ct) == 0)
   1763 		(void) VOP_FSYNC(realvp, syncval, cr, &ct);
   1764 	else
   1765 		(void) VOP_FSYNC(vp, syncval, cr, &ct);
   1766 
   1767 	if (resp->status != NFS4_OK) {
   1768 		VN_RELE(vp);
   1769 		goto final;
   1770 	}
   1771 	if (cs->vp)
   1772 		VN_RELE(cs->vp);
   1773 
   1774 	cs->vp = vp;
   1775 	*cs->statusp = resp->status = NFS4_OK;
   1776 
   1777 final:
   1778 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
   1779 	    CREATE4res *, resp);
   1780 }
   1781 
   1782 
   1783 /*ARGSUSED*/
   1784 static void
   1785 mds_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1786 	compound_state_t *cs)
   1787 {
   1788 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
   1789 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
   1790 	rfs4_deleg_state_t *dsp;
   1791 	nfsstat4 status;
   1792 
   1793 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
   1794 	    DELEGRETURN4args *, args);
   1795 
   1796 	status = rfs4_get_deleg_state(cs, &args->deleg_stateid, &dsp);
   1797 	resp->status = *cs->statusp = status;
   1798 	if (status != NFS4_OK)
   1799 		goto final;
   1800 
   1801 	/* Ensure specified filehandle matches */
   1802 	if (cs->vp != dsp->rds_finfo->rf_vp) {
   1803 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
   1804 	} else
   1805 		rfs4_return_deleg(dsp, FALSE);
   1806 
   1807 	rfs4_update_lease(dsp->rds_client);
   1808 
   1809 	rfs4_deleg_state_rele(dsp);
   1810 
   1811 final:
   1812 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
   1813 	    DELEGRETURN4res *, resp);
   1814 }
   1815 
   1816 
   1817 
   1818 /* ARGSUSED */
   1819 static void
   1820 mds_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1821 	compound_state_t *cs)
   1822 {
   1823 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
   1824 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   1825 	struct nfs4_svgetit_arg sarg;
   1826 	struct statvfs64 sb;
   1827 	nfsstat4 status;
   1828 
   1829 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
   1830 	    GETATTR4args *, args);
   1831 
   1832 	if (cs->vp == NULL) {
   1833 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1834 		goto final;
   1835 	}
   1836 
   1837 	if (cs->access == CS_ACCESS_DENIED) {
   1838 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1839 		goto final;
   1840 	}
   1841 
   1842 	sarg.sbp = &sb;
   1843 	sarg.cs = cs;
   1844 
   1845 	status = attrmap4_to_vattrmask(&args->attr_request, &sarg);
   1846 	if (status == NFS4_OK) {
   1847 		status = bitmap4_get_sysattrs(&sarg);
   1848 		if (status == NFS4_OK)
   1849 			status = do_rfs4_op_getattr(&args->attr_request,
   1850 			    &resp->obj_attributes, &sarg);
   1851 	}
   1852 	*cs->statusp = resp->status = status;
   1853 
   1854 final:
   1855 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
   1856 	    GETATTR4res *, resp);
   1857 }
   1858 
   1859 /*ARGSUSED*/
   1860 void
   1861 mds_op_getattr_free(nfs_resop4 *resop, compound_state_t *cs)
   1862 {
   1863 	/* Common function for NFSv4.0 and NFSv4.1 */
   1864 	rfs4_op_getattr_free(resop);
   1865 }
   1866 
   1867 /* ARGSUSED */
   1868 static void
   1869 mds_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1870 	compound_state_t *cs)
   1871 {
   1872 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   1873 
   1874 	DTRACE_NFSV4_1(op__getfh__start,
   1875 	    struct compound_state *, cs);
   1876 
   1877 	if (cs->vp == NULL) {
   1878 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1879 		goto final;
   1880 	}
   1881 	if (cs->access == CS_ACCESS_DENIED) {
   1882 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1883 		goto final;
   1884 	}
   1885 
   1886 	resp->object.nfs_fh4_val =
   1887 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
   1888 	nfs_fh4_copy(&cs->fh, &resp->object);
   1889 	*cs->statusp = resp->status = NFS4_OK;
   1890 
   1891 final:
   1892 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
   1893 	    GETFH4res *, resp);
   1894 }
   1895 
   1896 /*ARGSUSED*/
   1897 static void
   1898 mds_op_getfh_free(nfs_resop4 *resop, compound_state_t *cs)
   1899 {
   1900 	/* Common function for NFSv4.0 and NFSv4.1 */
   1901 	rfs4_op_getfh_free(resop);
   1902 }
   1903 
   1904 /*
   1905  * link: args: SAVED_FH: file, CURRENT_FH: target directory
   1906  *	 res: status. If success - CURRENT_FH unchanged, return change_info
   1907  */
   1908 /* ARGSUSED */
   1909 static void
   1910 mds_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1911 	compound_state_t *cs)
   1912 {
   1913 	LINK4args *args = &argop->nfs_argop4_u.oplink;
   1914 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
   1915 	int error;
   1916 	vnode_t *vp;
   1917 	vnode_t *dvp;
   1918 	struct vattr bdva, idva, adva;
   1919 	char *nm;
   1920 	uint_t  len;
   1921 	caller_context_t ct;
   1922 
   1923 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
   1924 	    LINK4args *, args);
   1925 
   1926 	/* SAVED_FH: source object */
   1927 	vp = cs->saved_vp;
   1928 	if (vp == NULL) {
   1929 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1930 		goto final;
   1931 	}
   1932 
   1933 	/* CURRENT_FH: target directory */
   1934 	dvp = cs->vp;
   1935 	if (dvp == NULL) {
   1936 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1937 		goto final;
   1938 	}
   1939 
   1940 	/*
   1941 	 * If there is a non-shared filesystem mounted on this vnode,
   1942 	 * do not allow to link any file in this directory.
   1943 	 */
   1944 	if (vn_ismntpt(dvp)) {
   1945 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1946 		goto final;
   1947 	}
   1948 
   1949 	if (cs->access == CS_ACCESS_DENIED) {
   1950 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1951 		goto final;
   1952 	}
   1953 
   1954 	/* Check source object's type validity */
   1955 	if (vp->v_type == VDIR) {
   1956 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   1957 		goto final;
   1958 	}
   1959 
   1960 	/* Check target directory's type */
   1961 	if (dvp->v_type != VDIR) {
   1962 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1963 		goto final;
   1964 	}
   1965 
   1966 	if (cs->saved_exi != cs->exi) {
   1967 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   1968 		goto final;
   1969 	}
   1970 
   1971 	if (!utf8_dir_verify(&args->newname)) {
   1972 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1973 		goto final;
   1974 	}
   1975 
   1976 	nm = utf8_to_fn(&args->newname, &len, NULL);
   1977 	if (nm == NULL) {
   1978 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1979 		goto final;
   1980 	}
   1981 
   1982 	if (len > MAXNAMELEN) {
   1983 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1984 		kmem_free(nm, len);
   1985 		goto final;
   1986 	}
   1987 
   1988 	if (rdonly4(cs->exi, cs->vp, req)) {
   1989 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1990 		kmem_free(nm, len);
   1991 		goto final;
   1992 	}
   1993 
   1994 	ct.cc_sysid = 0;
   1995 	ct.cc_pid = 0;
   1996 	ct.cc_caller_id = cs->instp->caller_id;
   1997 	ct.cc_flags = CC_DONTBLOCK;
   1998 
   1999 	/* Get "before" change value */
   2000 	bdva.va_mask = AT_CTIME|AT_SEQ;
   2001 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct);
   2002 	if (error) {
   2003 		*cs->statusp = resp->status = puterrno4(error);
   2004 		kmem_free(nm, len);
   2005 		goto final;
   2006 	}
   2007 
   2008 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   2009 
   2010 	error = VOP_LINK(dvp, vp, nm, cs->cr, &ct, 0);
   2011 
   2012 	kmem_free(nm, len);
   2013 
   2014 	/*
   2015 	 * Get the initial "after" sequence number, if it fails, set to zero
   2016 	 */
   2017 	idva.va_mask = AT_SEQ;
   2018 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct))
   2019 		idva.va_seq = 0;
   2020 
   2021 	/*
   2022 	 * Force modified data and metadata out to stable storage.
   2023 	 */
   2024 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, &ct);
   2025 	(void) VOP_FSYNC(dvp, 0, cs->cr, &ct);
   2026 
   2027 	if (error) {
   2028 		*cs->statusp = resp->status = puterrno4(error);
   2029 		goto final;
   2030 	}
   2031 
   2032 	/*
   2033 	 * Get "after" change value, if it fails, simply return the
   2034 	 * before value.
   2035 	 */
   2036 	adva.va_mask = AT_CTIME|AT_SEQ;
   2037 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) {
   2038 		adva.va_ctime = bdva.va_ctime;
   2039 		adva.va_seq = 0;
   2040 	}
   2041 
   2042 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   2043 
   2044 	/*
   2045 	 * The cinfo.atomic = TRUE only if we have
   2046 	 * non-zero va_seq's, and it has incremented by exactly one
   2047 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
   2048 	 */
   2049 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   2050 	    idva.va_seq == (bdva.va_seq + 1) &&
   2051 	    idva.va_seq == adva.va_seq)
   2052 		resp->cinfo.atomic = TRUE;
   2053 	else
   2054 		resp->cinfo.atomic = FALSE;
   2055 
   2056 	*cs->statusp = resp->status = NFS4_OK;
   2057 
   2058 final:
   2059 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
   2060 	    LINK4res *, resp);
   2061 }
   2062 
   2063 /*
   2064  * Used by mds_op_lookup and mds_op_lookupp to do the actual work.
   2065  */
   2066 
   2067 /* ARGSUSED */
   2068 static nfsstat4
   2069 mds_do_lookup(char *nm, uint_t buflen, struct svc_req *req,
   2070 	struct compound_state *cs)
   2071 {
   2072 	int error;
   2073 	int different_export = 0;
   2074 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
   2075 	struct exportinfo *exi = NULL, *pre_exi = NULL;
   2076 	nfsstat4 stat;
   2077 	fid_t fid;
   2078 	int attrdir, dotdot, walk;
   2079 	bool_t is_newvp = FALSE;
   2080 	caller_context_t ct;
   2081 	nfs41_fh_fmt_t *fhp;
   2082 
   2083 	fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val;
   2084 
   2085 	attrdir = ((cs->vp->v_flag & V_XATTRDIR) == V_XATTRDIR)
   2086 	    ? FH41_ATTRDIR : 0;
   2087 
   2088 	ASSERT(FH41_GET_FLAG(fhp, FH41_ATTRDIR) == attrdir);
   2089 
   2090 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
   2091 
   2092 	/*
   2093 	 * If dotdotting, then need to check whether it's
   2094 	 * above the root of a filesystem, or above an
   2095 	 * export point.
   2096 	 */
   2097 	if (dotdot) {
   2098 
   2099 		/*
   2100 		 * If dotdotting at the root of a filesystem, then
   2101 		 * need to traverse back to the mounted-on filesystem
   2102 		 * and do the dotdot lookup there.
   2103 		 */
   2104 		if (cs->vp->v_flag & VROOT) {
   2105 
   2106 			/*
   2107 			 * If at the system root, then can
   2108 			 * go up no further.
   2109 			 */
   2110 			if (VN_CMP(cs->vp, rootdir))
   2111 				return (puterrno4(ENOENT));
   2112 
   2113 			/*
   2114 			 * Traverse back to the mounted-on filesystem
   2115 			 */
   2116 			cs->vp = untraverse(cs->vp);
   2117 
   2118 			/*
   2119 			 * Set the different_export flag so we remember
   2120 			 * to pick up a new exportinfo entry for
   2121 			 * this new filesystem.
   2122 			 */
   2123 			different_export = 1;
   2124 		} else {
   2125 
   2126 			/*
   2127 			 * If dotdotting above an export point then set
   2128 			 * the different_export to get new export info.
   2129 			 */
   2130 			different_export = nfs_exported(cs->exi, cs->vp);
   2131 		}
   2132 	}
   2133 
   2134 	ct.cc_sysid = 0;
   2135 	ct.cc_pid = 0;
   2136 	ct.cc_caller_id = cs->instp->caller_id;
   2137 	ct.cc_flags = CC_DONTBLOCK;
   2138 
   2139 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
   2140 	    &ct, 0, NULL);
   2141 	if (error)
   2142 		return (puterrno4(error));
   2143 
   2144 	/*
   2145 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
   2146 	 *
   2147 	 * XXX if the vnode is a symlink and it is not visible in
   2148 	 * a pseudo filesystem, return ENOENT (not following symlink).
   2149 	 * V4 client can not mount such symlink.
   2150 	 *
   2151 	 * In the same exported filesystem, if the security flavor used
   2152 	 * is not an explicitly shared flavor, limit the view to the visible
   2153 	 * list entries only. This is not a WRONGSEC case because it's already
   2154 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
   2155 	 */
   2156 	if (!different_export &&
   2157 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
   2158 	    cs->access & CS_ACCESS_LIMITED)) {
   2159 		if (! nfs_visible(cs->exi, vp, &different_export)) {
   2160 			VN_RELE(vp);
   2161 			return (puterrno4(ENOENT));
   2162 		}
   2163 	}
   2164 
   2165 	/*
   2166 	 * If it's a mountpoint, then traverse it.
   2167 	 */
   2168 	if (vn_ismntpt(vp)) {
   2169 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
   2170 		pre_tvp = vp;		/* save pre-traversed vnode	*/
   2171 
   2172 		/*
   2173 		 * hold pre_tvp to counteract rele by traverse.  We will
   2174 		 * need pre_tvp below if checkexport4 fails
   2175 		 */
   2176 		VN_HOLD(pre_tvp);
   2177 		tvp = vp;
   2178 		if ((error = traverse(&tvp)) != 0) {
   2179 			VN_RELE(vp);
   2180 			VN_RELE(pre_tvp);
   2181 			return (puterrno4(error));
   2182 		}
   2183 		vp = tvp;
   2184 		different_export = 1;
   2185 
   2186 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
   2187 		/*
   2188 		 * The vfsp comparison is to handle the case where
   2189 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
   2190 		 * and NFS is unaware of local fs transistions because
   2191 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
   2192 		 * the dir and the obj returned by lookup will have different
   2193 		 * vfs ptrs.
   2194 		 */
   2195 		different_export = 1;
   2196 	}
   2197 
   2198 	if (different_export) {
   2199 		bzero(&fid, sizeof (fid));
   2200 		fid.fid_len = MAXFIDSZ;
   2201 		error = vop_fid_pseudo(vp, &fid);
   2202 		if (error) {
   2203 			VN_RELE(vp);
   2204 			if (pre_tvp)
   2205 				VN_RELE(pre_tvp);
   2206 			return (puterrno4(error));
   2207 		}
   2208 
   2209 		if (dotdot)
   2210 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
   2211 		else
   2212 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
   2213 
   2214 		if (exi == NULL) {
   2215 			if (pre_tvp) {
   2216 				/*
   2217 				 * If this vnode is a mounted-on vnode,
   2218 				 * but the mounted-on file system is not
   2219 				 * exported, send back the filehandle for
   2220 				 * the mounted-on vnode, not the root of
   2221 				 * the mounted-on file system.
   2222 				 */
   2223 				VN_RELE(vp);
   2224 				vp = pre_tvp;
   2225 				exi = pre_exi;
   2226 			} else {
   2227 				VN_RELE(vp);
   2228 				return (puterrno4(EACCES));
   2229 			}
   2230 		} else if (pre_tvp) {
   2231 			/* we're done with pre_tvp now. release extra hold */
   2232 			VN_RELE(pre_tvp);
   2233 		}
   2234 
   2235 		cs->exi = exi;
   2236 
   2237 		/*
   2238 		 * Now do a checkauth4.
   2239 		 *
   2240 		 * Checking here since the client/principle may not have
   2241 		 * access to the cs->exi exported file system.
   2242 		 *
   2243 		 * If the client has access we also need to validate
   2244 		 * the principle since it may have been re-mapped.
   2245 		 *
   2246 		 * We start with a new credential as a previous call to
   2247 		 * checkauth4(), via a PUT*FH operation, wrote over cs->cr.
   2248 		 */
   2249 		crfree(cs->cr);
   2250 		cs->cr = crdup(cs->basecr);
   2251 
   2252 		if (cs->vp)
   2253 			oldvp = cs->vp;
   2254 		cs->vp = vp;
   2255 		is_newvp = TRUE;
   2256 
   2257 		stat = call_checkauth4(cs, req);
   2258 		if (stat != NFS4_OK) {
   2259 			VN_RELE(cs->vp);
   2260 			cs->vp = oldvp;
   2261 			return (stat);
   2262 		}
   2263 	}
   2264 
   2265 	/*
   2266 	 * After various NFS checks, do a label check on the path
   2267 	 * component. The label on this path should either be the
   2268 	 * global zone's label or a zone's label. We are only
   2269 	 * interested in the zone's label because exported files
   2270 	 * in global zone is accessible (though read-only) to
   2271 	 * clients. The exportability/visibility check is already
   2272 	 * done before reaching this code.
   2273 	 */
   2274 	if (is_system_labeled()) {
   2275 		bslabel_t *clabel;
   2276 
   2277 		ASSERT(req->rq_label != NULL);
   2278 		clabel = req->rq_label;
   2279 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
   2280 		    "got client label from request(1)", struct svc_req *, req);
   2281 
   2282 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   2283 			if (!do_rfs_label_check(clabel, vp,
   2284 			    DOMINANCE_CHECK, cs->exi)) {
   2285 				error = EACCES;
   2286 				goto err_out;
   2287 			}
   2288 		} else {
   2289 			/*
   2290 			 * We grant access to admin_low label clients
   2291 			 * only if the client is trusted, i.e. also
   2292 			 * running Solaris Trusted Extension.
   2293 			 */
   2294 			struct sockaddr	*ca;
   2295 			int		addr_type;
   2296 			void		*ipaddr;
   2297 			tsol_tpc_t	*tp;
   2298 
   2299 			ca = (struct sockaddr *)svc_getrpccaller(
   2300 			    req->rq_xprt)->buf;
   2301 			if (ca->sa_family == AF_INET) {
   2302 				addr_type = IPV4_VERSION;
   2303 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
   2304 			} else if (ca->sa_family == AF_INET6) {
   2305 				addr_type = IPV6_VERSION;
   2306 				ipaddr = &((struct sockaddr_in6 *)
   2307 				    ca)->sin6_addr;
   2308 			}
   2309 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
   2310 			if (tp == NULL || tp->tpc_tp.tp_doi !=
   2311 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
   2312 			    SUN_CIPSO) {
   2313 				error = EACCES;
   2314 				goto err_out;
   2315 			}
   2316 		}
   2317 	}
   2318 
   2319 	error = mknfs41_fh(&cs->fh, vp, cs->exi);
   2320 
   2321 err_out:
   2322 	if (error) {
   2323 		if (is_newvp) {
   2324 			VN_RELE(cs->vp);
   2325 			cs->vp = oldvp;
   2326 		} else
   2327 			VN_RELE(vp);
   2328 		return (puterrno4(error));
   2329 	}
   2330 
   2331 	if (!is_newvp) {
   2332 		if (cs->vp)
   2333 			VN_RELE(cs->vp);
   2334 		cs->vp = vp;
   2335 	} else if (oldvp)
   2336 		VN_RELE(oldvp);
   2337 
   2338 	/*
   2339 	 * if did lookup on attrdir and didn't lookup .., set named
   2340 	 * attr fh flag
   2341 	 */
   2342 	if (attrdir && ! dotdot)
   2343 		FH41_SET_FLAG(fhp, FH41_NAMEDATTR);
   2344 
   2345 	/* Assume false for now, open proc will set this */
   2346 	cs->mandlock = FALSE;
   2347 
   2348 	return (NFS4_OK);
   2349 }
   2350 
   2351 /* ARGSUSED */
   2352 static void
   2353 mds_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2354 	compound_state_t *cs)
   2355 {
   2356 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
   2357 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
   2358 	char *nm;
   2359 	uint_t len;
   2360 
   2361 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
   2362 	    LOOKUP4args *, args);
   2363 
   2364 	if (cs->vp == NULL) {
   2365 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2366 		goto final;
   2367 	}
   2368 
   2369 	if (cs->vp->v_type == VLNK) {
   2370 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
   2371 		goto final;
   2372 	}
   2373 
   2374 	if (cs->vp->v_type != VDIR) {
   2375 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2376 		goto final;
   2377 	}
   2378 
   2379 	if (!utf8_dir_verify(&args->objname)) {
   2380 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2381 		goto final;
   2382 	}
   2383 
   2384 	nm = utf8_to_str(&args->objname, &len, NULL);
   2385 	if (nm == NULL) {
   2386 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2387 		goto final;
   2388 	}
   2389 
   2390 	if (len > MAXNAMELEN) {
   2391 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2392 		kmem_free(nm, len);
   2393 		goto final;
   2394 	}
   2395 
   2396 	*cs->statusp = resp->status = mds_do_lookup(nm, len, req, cs);
   2397 
   2398 	kmem_free(nm, len);
   2399 
   2400 final:
   2401 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
   2402 	    LOOKUP4res *, resp);
   2403 }
   2404 
   2405 /* ARGSUSED */
   2406 static void
   2407 mds_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2408 	compound_state_t *cs)
   2409 {
   2410 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
   2411 
   2412 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
   2413 
   2414 	if (cs->vp == NULL) {
   2415 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2416 		goto final;
   2417 	}
   2418 
   2419 	if (cs->vp->v_type != VDIR) {
   2420 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2421 		goto final;
   2422 	}
   2423 
   2424 	*cs->statusp = resp->status = mds_do_lookup("..", 3, req, cs);
   2425 
   2426 	/*
   2427 	 * From NFSV4 Specification, LOOKUPP should not check for
   2428 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
   2429 	 */
   2430 	if (resp->status == NFS4ERR_WRONGSEC) {
   2431 		*cs->statusp = resp->status = NFS4_OK;
   2432 	}
   2433 
   2434 final:
   2435 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
   2436 	    LOOKUPP4res *, resp);
   2437 }
   2438 
   2439 
   2440 /*ARGSUSED2*/
   2441 static void
   2442 mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2443 	compound_state_t *cs)
   2444 {
   2445 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
   2446 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
   2447 	vnode_t		*avp = NULL;
   2448 	int		lookup_flags = LOOKUP_XATTR, error;
   2449 	int		exp_ro = 0;
   2450 	caller_context_t ct;
   2451 
   2452 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
   2453 	    OPENATTR4args *, args);
   2454 
   2455 	if (cs->vp == NULL) {
   2456 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2457 		goto final;
   2458 	}
   2459 
   2460 	/*
   2461 	 * Make a couple of checks made by copen()
   2462 	 *
   2463 	 * Check to make sure underlying fs supports xattrs.  This
   2464 	 * is required because solaris filesystem implementations
   2465 	 * (UFS/TMPFS) don't enforce the noxattr mount option
   2466 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
   2467 	 * pathconf cmd or if fs supports cmd but doesn't claim
   2468 	 * support for xattr, return NOTSUPP.  It would be better
   2469 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
   2470 	 * that cmd is not available to VOP_PATHCONF interface
   2471 	 * (it's only implemented inside pathconf syscall)...
   2472 	 *
   2473 	 * Verify permission to put attributes on files (access
   2474 	 * checks from copen).
   2475 	 */
   2476 
   2477 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
   2478 		error = ENOTSUP;
   2479 		goto error_out;
   2480 	}
   2481 
   2482 	ct.cc_sysid = 0;
   2483 	ct.cc_pid = 0;
   2484 	ct.cc_caller_id = cs->instp->caller_id;
   2485 	ct.cc_flags = CC_DONTBLOCK;
   2486 
   2487 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, &ct) != 0) &&
   2488 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, &ct) != 0) &&
   2489 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, &ct) != 0)) {
   2490 		error = EACCES;
   2491 		goto error_out;
   2492 	}
   2493 
   2494 	/*
   2495 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
   2496 	 * the file system is exported read-only -- regardless of
   2497 	 * createdir flag.  Otherwise the attrdir would be created
   2498 	 * (assuming server fs isn't mounted readonly locally).  If
   2499 	 * VOP_LOOKUP returns ENOENT in this case, the error will
   2500 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
   2501 	 * because specfs has no VOP_LOOKUP op, so the macro would
   2502 	 * return ENOSYS.  EINVAL is returned by all (current)
   2503 	 * Solaris file system implementations when any of their
   2504 	 * restrictions are violated (xattr(dir) can't have xattrdir).
   2505 	 * Returning NOTSUPP is more appropriate in this case
   2506 	 * because the object will never be able to have an attrdir.
   2507 	 */
   2508 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
   2509 		lookup_flags |= CREATE_XATTR_DIR;
   2510 
   2511 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL,
   2512 	    cs->cr, &ct, 0, NULL);
   2513 
   2514 	if (error) {
   2515 		if (error == ENOENT && args->createdir && exp_ro)
   2516 			error = EROFS;
   2517 		else if (error == EINVAL || error == ENOSYS)
   2518 			error = ENOTSUP;
   2519 		goto error_out;
   2520 	}
   2521 
   2522 	ASSERT(avp->v_flag & V_XATTRDIR);
   2523 
   2524 	error = mknfs41_fh(&cs->fh, avp, cs->exi);
   2525 
   2526 	if (error) {
   2527 		VN_RELE(avp);
   2528 		goto error_out;
   2529 	}
   2530 
   2531 	VN_RELE(cs->vp);
   2532 	cs->vp = avp;
   2533 
   2534 	/*
   2535 	 * There is no requirement for an attrdir fh flag
   2536 	 * because the attrdir has a vnode flag to distinguish
   2537 	 * it from regular (non-xattr) directories.  The
   2538 	 * FH41_ATTRDIR flag is set for future sanity checks.
   2539 	 */
   2540 	FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val, FH41_ATTRDIR);
   2541 	*cs->statusp = resp->status = NFS4_OK;
   2542 	goto final;
   2543 
   2544 error_out:
   2545 
   2546 	*cs->statusp = resp->status = puterrno4(error);
   2547 
   2548 final:
   2549 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
   2550 	    OPENATTR4res *, resp);
   2551 }
   2552 
   2553 static int
   2554 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
   2555     caller_context_t *ct)
   2556 {
   2557 	int error;
   2558 	int i;
   2559 	clock_t delaytime;
   2560 
   2561 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
   2562 
   2563 	/*
   2564 	 * Don't block on mandatory locks. If this routine returns
   2565 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
   2566 	 */
   2567 	uio->uio_fmode = FNONBLOCK;
   2568 
   2569 	for (i = 0; i < rfs4_maxlock_tries; i++) {
   2570 		if (direction == FREAD) {
   2571 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
   2572 			error = VOP_READ(vp, uio, ioflag, cred, ct);
   2573 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
   2574 		} else {
   2575 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
   2576 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
   2577 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
   2578 		}
   2579 
   2580 		if (error != EAGAIN)
   2581 			break;
   2582 
   2583 		if (i < rfs4_maxlock_tries - 1) {
   2584 			delay(delaytime);
   2585 			delaytime *= 2;
   2586 		}
   2587 	}
   2588 
   2589 	return (error);
   2590 }
   2591 
   2592 /* ARGSUSED */
   2593 static void
   2594 mds_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2595 	compound_state_t *cs)
   2596 {
   2597 	READ4args *args = &argop->nfs_argop4_u.opread;
   2598 	READ4res *resp = &resop->nfs_resop4_u.opread;
   2599 	int error;
   2600 	nnode_t *nn = NULL;
   2601 	struct iovec iov;
   2602 	struct uio uio;
   2603 	bool_t *deleg = &cs->deleg;
   2604 	nfsstat4 stat;
   2605 	mblk_t *mp;
   2606 	int alloc_err = 0;
   2607 	caller_context_t ct;
   2608 	uint32_t nnioflags = 0;
   2609 
   2610 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
   2611 	    READ4args, args);
   2612 
   2613 	nn = cs->nn;
   2614 	if (nn == NULL) {
   2615 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2616 		goto final;
   2617 	}
   2618 	if (cs->access == CS_ACCESS_DENIED) {
   2619 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2620 		goto final;
   2621 	}
   2622 
   2623 	if ((stat = nnop_check_stateid(nn, cs, FREAD, &args->stateid,
   2624 	    FALSE, deleg, TRUE, &ct, NULL)) != NFS4_OK) {
   2625 		*cs->statusp = resp->status = stat;
   2626 		goto final;
   2627 	}
   2628 
   2629 	error = nnop_io_prep(nn, &nnioflags, cs->cr, &ct, args->offset,
   2630 	    args->count, NULL);
   2631 	if (error != 0) {
   2632 		*cs->statusp = resp->status = nnode_stat4(error, 1);
   2633 		goto out;
   2634 	}
   2635 
   2636 	if (nnioflags & NNODE_IO_FLAG_PAST_EOF) {
   2637 		*cs->statusp = resp->status = NFS4_OK;
   2638 		resp->eof = TRUE;
   2639 		resp->data_len = 0;
   2640 		resp->data_val = NULL;
   2641 		resp->mblk = NULL;
   2642 		*cs->statusp = resp->status = NFS4_OK;
   2643 		goto out;
   2644 	}
   2645 
   2646 	if (args->count == 0) {
   2647 		*cs->statusp = resp->status = NFS4_OK;
   2648 		resp->eof = FALSE;
   2649 		resp->data_len = 0;
   2650 		resp->data_val = NULL;
   2651 		resp->mblk = NULL;
   2652 		goto out;
   2653 	}
   2654 
   2655 	/*
   2656 	 * Do not allocate memory more than maximum allowed
   2657 	 * transfer size
   2658 	 */
   2659 	if (args->count > rfs4_tsize(req))
   2660 		args->count = rfs4_tsize(req);
   2661 
   2662 	if (args->wlist) {
   2663 		mp = NULL;
   2664 		(void) rdma_get_wchunk(req, &iov, args->wlist);
   2665 	} else {
   2666 		/*
   2667 		 * mp will contain the data to be sent out in the read reply.
   2668 		 * It will be freed after the reply has been sent.
   2669 		 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
   2670 		 * so that the call to xdrmblk_putmblk() never fails.
   2671 		 * If the first alloc of the requested size fails, then
   2672 		 * decrease the size to something more reasonable and wait
   2673 		 * for the allocation to occur.
   2674 		 */
   2675 		mp = allocb(RNDUP(args->count), BPRI_MED);
   2676 		if (mp == NULL) {
   2677 			if (args->count > MAXBSIZE)
   2678 				args->count = MAXBSIZE;
   2679 			mp = allocb_wait(RNDUP(args->count), BPRI_MED,
   2680 			    STR_NOSIG, &alloc_err);
   2681 		}
   2682 		ASSERT(mp != NULL);
   2683 		ASSERT(alloc_err == 0);
   2684 
   2685 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
   2686 		iov.iov_len = args->count;
   2687 	}
   2688 
   2689 	uio.uio_iov = &iov;
   2690 	uio.uio_iovcnt = 1;
   2691 	uio.uio_segflg = UIO_SYSSPACE;
   2692 	uio.uio_extflg = UIO_COPY_CACHED;
   2693 	uio.uio_loffset = args->offset;
   2694 	uio.uio_resid = args->count;
   2695 
   2696 	error = nnop_read(nn, &nnioflags, cs->cr, &ct, &uio, 0);
   2697 	if (error) {
   2698 		if (mp != NULL)
   2699 			freeb(mp);
   2700 		*cs->statusp = resp->status = nnode_stat4(error, 1);
   2701 		goto out;
   2702 	}
   2703 
   2704 	*cs->statusp = resp->status = NFS4_OK;
   2705 
   2706 	ASSERT(uio.uio_resid >= 0);
   2707 	resp->data_len = args->count - uio.uio_resid;
   2708 	resp->data_val = (char *)mp->b_datap->db_base;
   2709 	resp->mblk = mp;
   2710 
   2711 	resp->eof = (nnioflags & NNODE_IO_FLAG_EOF) ? TRUE : FALSE;
   2712 
   2713 out:
   2714 	nnop_io_release(nn, nnioflags, &ct);
   2715 
   2716 final:
   2717 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
   2718 	    READ4res *, resp);
   2719 }
   2720 
   2721 /*ARGSUSED*/
   2722 static void
   2723 mds_op_read_free(nfs_resop4 *resop, compound_state_t *cs)
   2724 {
   2725 	/* Common function for NFSv4.0 and NFSv4.1 */
   2726 	rfs4_op_read_free(resop);
   2727 }
   2728 
   2729 /* ARGSUSED */
   2730 static void
   2731 mds_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2732 	compound_state_t *cs)
   2733 {
   2734 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
   2735 	int error;
   2736 	vnode_t *vp;
   2737 	struct exportinfo *exi, *sav_exi;
   2738 	nfs41_fh_fmt_t *fhp;
   2739 	fid_t exp_fid;
   2740 
   2741 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
   2742 
   2743 	if (cs->vp) {
   2744 		VN_RELE(cs->vp);
   2745 		cs->vp = NULL;
   2746 	}
   2747 
   2748 	if (cs->cr)
   2749 		crfree(cs->cr);
   2750 
   2751 	cs->cr = crdup(cs->basecr);
   2752 
   2753 	vp = exi_public->exi_vp;
   2754 	if (vp == NULL) {
   2755 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   2756 		goto final;
   2757 	}
   2758 
   2759 	error = mknfs41_fh(&cs->fh, vp, exi_public);
   2760 	if (error != 0) {
   2761 		*cs->statusp = resp->status = puterrno4(error);
   2762 		goto final;
   2763 	}
   2764 	sav_exi = cs->exi;
   2765 	if (exi_public == exi_root) {
   2766 		/*
   2767 		 * No filesystem is actually shared public, so we default
   2768 		 * to exi_root. In this case, we must check whether root
   2769 		 * is exported.
   2770 		 */
   2771 		fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val;
   2772 
   2773 		exp_fid.fid_len = fhp->fh.v1.export_fid.len;
   2774 
   2775 		bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data,
   2776 		    exp_fid.fid_len);
   2777 
   2778 		/*
   2779 		 * if root filesystem is exported, the exportinfo struct that we
   2780 		 * should use is what checkexport4 returns, because root_exi is
   2781 		 * actually a mostly empty struct.
   2782 		 */
   2783 		exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL);
   2784 		cs->exi = ((exi != NULL) ? exi : exi_public);
   2785 	} else {
   2786 		/*
   2787 		 * it's a properly shared filesystem
   2788 		 */
   2789 		cs->exi = exi_public;
   2790 	}
   2791 
   2792 	VN_HOLD(vp);
   2793 	cs->vp = vp;
   2794 
   2795 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2796 		VN_RELE(cs->vp);
   2797 		cs->vp = NULL;
   2798 		cs->exi = sav_exi;
   2799 		goto final;
   2800 	}
   2801 
   2802 	*cs->statusp = resp->status = NFS4_OK;
   2803 
   2804 final:
   2805 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
   2806 	    PUTPUBFH4res *, resp);
   2807 }
   2808 
   2809 /*
   2810  * XXX - issue with put*fh operations.
   2811  *
   2812  * let us assume that /export/home is shared via NFS and a NFS client
   2813  * wishes to mount /export/home/joe.
   2814  *
   2815  * If /export, home, or joe have restrictive search permissions, then
   2816  * the NFS Server should not return a filehandle to the client.
   2817  *
   2818  * This case is easy to enforce. However, the NFS Client does not know
   2819  * which security flavor should be used until the pathname has been
   2820  * fully resolved. In addition there is another complication for uid
   2821  * mapping. If the credential being used is root, the default behaviour
   2822  * will be to map it to the anonymous user. However the NFS Server can not
   2823  * map it until the pathname has been fully resolved.
   2824  *
   2825  * XXX: JEFF:  Proposed solution.
   2826  *
   2827  * Luckily, SECINFO uses a full pathname.  So what we will
   2828  * have to do in mds_op_lookup is check that flavor of
   2829  * the target object matches that of the request, and if root was the
   2830  * caller, check for the root= and anon= options, and if necessary,
   2831  * repeat the lookup using the right cred_t.
   2832  *
   2833  * But that's not done yet.
   2834  */
   2835 /* ARGSUSED */
   2836 static void
   2837 mds_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2838 	compound_state_t *cs)
   2839 {
   2840 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
   2841 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
   2842 	nfs41_fh_fmt_t *fhp = NULL;
   2843 	fid_t  exp_fid;
   2844 	int error;
   2845 
   2846 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
   2847 	    PUTFH4args *, args);
   2848 
   2849 	/*
   2850 	 * release the old nnode, vnode and cred.
   2851 	 */
   2852 	if (cs->nn)
   2853 		nnode_rele(&cs->nn);
   2854 	if (cs->vp) {
   2855 		VN_RELE(cs->vp);
   2856 		cs->vp = NULL;
   2857 	}
   2858 	if (cs->cr) {
   2859 		crfree(cs->cr);
   2860 		cs->cr = NULL;
   2861 	}
   2862 
   2863 
   2864 	/*
   2865 	 * Check exportinfo only if it's a FH41_TYPE_NFS filehandle.
   2866 	 * If the filehandle is otherwise incorrect,
   2867 	 * nnode_from_fh_v41() will return an error.
   2868 	 */
   2869 	fhp = (nfs41_fh_fmt_t *)args->object.nfs_fh4_val;
   2870 	if (fhp->type == FH41_TYPE_NFS) {
   2871 		exp_fid.fid_len = fhp->fh.v1.export_fid.len;
   2872 		bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data,
   2873 		    exp_fid.fid_len);
   2874 		cs->exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL);
   2875 		if (cs->exi == NULL) {
   2876 			*cs->statusp = resp->status = NFS4ERR_STALE;
   2877 			DTRACE_PROBE(nfss41__e__chkexp);
   2878 			goto final;
   2879 		}
   2880 	}
   2881 
   2882 	error = nnode_from_fh_v41(&cs->nn, &args->object);
   2883 	if (error != 0) {
   2884 		resp->status = *cs->statusp = nnode_stat4(error, 1);
   2885 		goto final;
   2886 	}
   2887 	ASSERT(cs->nn != NULL);
   2888 
   2889 	cs->vp = nnop_io_getvp(cs->nn);
   2890 
   2891 	cs->cr = crdup(cs->basecr);
   2892 	ASSERT(cs->cr != NULL);
   2893 
   2894 	if (fhp->type == FH41_TYPE_NFS) {
   2895 		if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2896 			nnode_rele(&cs->nn);
   2897 			VN_RELE(cs->vp);
   2898 			cs->vp = NULL;
   2899 			crfree(cs->cr);
   2900 			cs->cr = NULL;
   2901 			*cs->statusp = resp->status;
   2902 			DTRACE_PROBE(nfss41__e__fail_auth);
   2903 			goto final;
   2904 		}
   2905 	}
   2906 
   2907 	nfs_fh4_copy(&args->object, &cs->fh);
   2908 	*cs->statusp = resp->status = NFS4_OK;
   2909 	cs->deleg = FALSE;
   2910 
   2911 final:
   2912 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
   2913 	    PUTFH4res *, resp);
   2914 }
   2915 
   2916 /* ARGSUSED */
   2917 static void
   2918 mds_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2919 	compound_state_t *cs)
   2920 
   2921 {
   2922 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
   2923 	int error;
   2924 	fid_t fid;
   2925 	struct exportinfo *exi, *sav_exi;
   2926 
   2927 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
   2928 
   2929 	if (cs->vp) {
   2930 		VN_RELE(cs->vp);
   2931 		cs->vp = NULL;
   2932 	}
   2933 
   2934 	if (cs->cr)
   2935 		crfree(cs->cr);
   2936 
   2937 	cs->cr = crdup(cs->basecr);
   2938 
   2939 	/*
   2940 	 * Using rootdir, the system root vnode,
   2941 	 * get its fid.
   2942 	 */
   2943 	bzero(&fid, sizeof (fid));
   2944 	fid.fid_len = MAXFIDSZ;
   2945 	error = vop_fid_pseudo(rootdir, &fid);
   2946 	if (error != 0) {
   2947 		*cs->statusp = resp->status = puterrno4(error);
   2948 		goto final;
   2949 	}
   2950 
   2951 	/*
   2952 	 * Then use the root fsid & fid it to find out if it's exported
   2953 	 *
   2954 	 * If the server root isn't exported directly, then
   2955 	 * it should at least be a pseudo export based on
   2956 	 * one or more exports further down in the server's
   2957 	 * file tree.
   2958 	 */
   2959 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
   2960 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
   2961 		DTRACE_PROBE(nfss41__e__chkexp);
   2962 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   2963 		goto final;
   2964 	}
   2965 
   2966 	/*
   2967 	 * Now make a filehandle based on the root
   2968 	 * export and root vnode.
   2969 	 */
   2970 	error = mknfs41_fh(&cs->fh, rootdir, exi);
   2971 	if (error != 0) {
   2972 		*cs->statusp = resp->status = puterrno4(error);
   2973 		goto final;
   2974 	}
   2975 
   2976 	sav_exi = cs->exi;
   2977 	cs->exi = exi;
   2978 
   2979 	VN_HOLD(rootdir);
   2980 	cs->vp = rootdir;
   2981 
   2982 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2983 		VN_RELE(rootdir);
   2984 		cs->vp = NULL;
   2985 		cs->exi = sav_exi;
   2986 		goto final;
   2987 	}
   2988 
   2989 	*cs->statusp = resp->status = NFS4_OK;
   2990 	cs->deleg = FALSE;
   2991 
   2992 final:
   2993 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
   2994 	    PUTROOTFH4res *, resp);
   2995 }
   2996 
   2997 /*
   2998  * A directory entry is a valid nfsv4 entry if
   2999  * - it has a non-zero ino
   3000  * - it is not a dot or dotdot name
   3001  * - it is visible in a pseudo export or in a real export that can
   3002  *   only have a limited view.
   3003  */
   3004 static bool_t
   3005 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
   3006 		int *expseudo, int check_visible)
   3007 {
   3008 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
   3009 		*expseudo = 0;
   3010 		return (FALSE);
   3011 	}
   3012 
   3013 	if (! check_visible) {
   3014 		*expseudo = 0;
   3015 		return (TRUE);
   3016 	}
   3017 
   3018 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
   3019 }
   3020 
   3021 
   3022 /*
   3023  * readlink: args: CURRENT_FH.
   3024  *	res: status. If success - CURRENT_FH unchanged, return linktext.
   3025  */
   3026 
   3027 /* ARGSUSED */
   3028 static void
   3029 mds_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3030 	compound_state_t *cs)
   3031 {
   3032 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3033 	int error;
   3034 	vnode_t *vp;
   3035 	struct iovec iov;
   3036 	struct vattr va;
   3037 	struct uio uio;
   3038 	char *data;
   3039 	caller_context_t ct;
   3040 
   3041 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
   3042 
   3043 	/* CURRENT_FH: directory */
   3044 	vp = cs->vp;
   3045 	if (vp == NULL) {
   3046 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3047 		goto final;
   3048 	}
   3049 
   3050 	if (cs->access == CS_ACCESS_DENIED) {
   3051 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3052 		goto final;
   3053 	}
   3054 
   3055 	if (vp->v_type == VDIR) {
   3056 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   3057 		goto final;
   3058 	}
   3059 
   3060 	if (vp->v_type != VLNK) {
   3061 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3062 		goto final;
   3063 	}
   3064 
   3065 	ct.cc_sysid = 0;
   3066 	ct.cc_pid = 0;
   3067 	ct.cc_caller_id = cs->instp->caller_id;
   3068 	ct.cc_flags = CC_DONTBLOCK;
   3069 
   3070 	va.va_mask = AT_MODE;
   3071 	error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3072 	if (error) {
   3073 		*cs->statusp = resp->status = puterrno4(error);
   3074 		goto final;
   3075 	}
   3076 
   3077 	if (MANDLOCK(vp, va.va_mode)) {
   3078 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3079 		goto final;
   3080 	}
   3081 
   3082 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
   3083 
   3084 	iov.iov_base = data;
   3085 	iov.iov_len = MAXPATHLEN;
   3086 	uio.uio_iov = &iov;
   3087 	uio.uio_iovcnt = 1;
   3088 	uio.uio_segflg = UIO_SYSSPACE;
   3089 	uio.uio_extflg = UIO_COPY_CACHED;
   3090 	uio.uio_loffset = 0;
   3091 	uio.uio_resid = MAXPATHLEN;
   3092 
   3093 	error = VOP_READLINK(vp, &uio, cs->cr, &ct);
   3094 
   3095 	if (error) {
   3096 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3097 		*cs->statusp = resp->status = puterrno4(error);
   3098 		goto final;
   3099 	}
   3100 
   3101 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
   3102 
   3103 	/*
   3104 	 * treat link name as data
   3105 	 */
   3106 	(void) str_to_utf8(data, &resp->link);
   3107 
   3108 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3109 	*cs->statusp = resp->status = NFS4_OK;
   3110 
   3111 final:
   3112 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
   3113 	    READLINK4res *, resp);
   3114 }
   3115 
   3116 /*ARGSUSED*/
   3117 static void
   3118 mds_op_readlink_free(nfs_resop4 *resop, compound_state_t *cs)
   3119 {
   3120 	/* Common function used for NFSv4.0 and NFSv4.1 */
   3121 	rfs4_op_readlink_free(resop);
   3122 }
   3123 
   3124 /* ARGSUSED */
   3125 static void
   3126 mds_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
   3127     struct svc_req *req, compound_state_t *cs)
   3128 {
   3129 	RECLAIM_COMPLETE4args *args = &argop->nfs_argop4_u.opreclaim_complete;
   3130 	RECLAIM_COMPLETE4res *resp = &resop->nfs_resop4_u.opreclaim_complete;
   3131 	rfs4_client_t *cp;
   3132 
   3133 	cp = cs->cp;
   3134 
   3135 	if (cp->rc_reclaim_completed) {
   3136 		*cs->statusp = resp->rcr_status = NFS4ERR_COMPLETE_ALREADY;
   3137 		return;
   3138 	}
   3139 
   3140 	if (args->rca_one_fs) {
   3141 		/* do what?  we don't track this */
   3142 		*cs->statusp = resp->rcr_status = NFS4_OK;
   3143 		return;
   3144 	}
   3145 
   3146 	cp->rc_reclaim_completed = 1;
   3147 
   3148 	/* did we have reclaimable state stored for this client? */
   3149 	if (cp->rc_can_reclaim)
   3150 		atomic_add_32(&(cs->instp->reclaim_cnt), -1);
   3151 
   3152 	*cs->statusp = resp->rcr_status = NFS4_OK;
   3153 }
   3154 
   3155 /*
   3156  * short utility function to lookup a file and recall the delegation
   3157  */
   3158 static rfs4_file_t *
   3159 mds_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
   3160 	int *lkup_error, struct compound_state *cs)
   3161 {
   3162 	vnode_t *vp;
   3163 	rfs4_file_t *fp = NULL;
   3164 	bool_t fcreate = FALSE;
   3165 	int error;
   3166 
   3167 	if (vpp)
   3168 		*vpp = NULL;
   3169 
   3170 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
   3171 	    NULL, 0, NULL)) == 0) {
   3172 		if (vp->v_type == VREG)
   3173 			fp = rfs4_findfile(cs->instp, vp, NULL, &fcreate);
   3174 		if (vpp)
   3175 			*vpp = vp;
   3176 		else
   3177 			VN_RELE(vp);
   3178 	}
   3179 
   3180 	if (lkup_error)
   3181 		*lkup_error = error;
   3182 
   3183 	return (fp);
   3184 }
   3185 
   3186 static int
   3187 do_ctl_mds_remove(vnode_t *vp, rfs4_file_t *fp, compound_state_t *cs)
   3188 {
   3189 	fid_t fid;
   3190 	nfs41_fid_t nfs41_fid;
   3191 	int error = 0;
   3192 
   3193 	/*
   3194 	 * Use the file layout to determine which data servers to
   3195 	 * send DS_REMOVEs to.  If the layout is not cached in the
   3196 	 * rfs4_file_t either this means that we do not have a layout
   3197 	 * or it needs to be read in from disk.  Right now, we do not
   3198 	 * attempt to read the layout in from disk, but future phases
   3199 	 * of REMOVE handling will take this into consideration.
   3200 	 *
   3201 	 * Known Problems with this implementation of REMOVE:
   3202 	 * 1. Not attempting to read a layout from disk could mean
   3203 	 * that if an on-disk layout did exist, storage on the data
   3204 	 * servers will not be freed.
   3205 	 *
   3206 	 * 2. The server populates the layout stored in the rfs4_file_t
   3207 	 * when it receives a LAYOUTGET.  If the file has been written
   3208 	 * (perhaps in a past server instance), but no clients have
   3209 	 * issued new LAYOUTGETs, we will not have a cached layout and
   3210 	 * we will not free space on the data servers.
   3211 	 *
   3212 	 * 3. If any of the DS_REMOVE calls to the data servers fail
   3213 	 * the errors are ignored and will not be retried.  This may
   3214 	 * cause leaked space on the the data server.
   3215 	 */
   3216 	if (fp->rf_mlo != NULL) {
   3217 		bzero(&fid, sizeof (fid));
   3218 		fid.fid_len = MAXFIDSZ;
   3219 
   3220 		error = vop_fid_pseudo(vp, &fid);
   3221 		if (error) {
   3222 			DTRACE_NFSV4_1(nfss__e__vop_fid_pseudo_failed,
   3223 			    int, error);
   3224 			return (error);
   3225 		} else {
   3226 			nfs41_fid.len = fid.fid_len;
   3227 			bcopy(fid.fid_data, nfs41_fid.val, nfs41_fid.len);
   3228 		}
   3229 
   3230 		error = ctl_mds_clnt_remove_file(cs->instp, cs->exi->exi_fsid,
   3231 		    nfs41_fid, fp->rf_mlo);
   3232 	} else
   3233 		DTRACE_PROBE(nfss__i__layout_is_null_cannot_remove);
   3234 
   3235 	return (error);
   3236 }
   3237 
   3238 /*
   3239  * remove: args: CURRENT_FH: directory; name.
   3240  *	res: status. If success - CURRENT_FH unchanged, return change_info
   3241  *		for directory.
   3242  */
   3243 /* ARGSUSED */
   3244 static void
   3245 mds_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3246 	compound_state_t *cs)
   3247 {
   3248 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
   3249 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
   3250 	int error;
   3251 	vnode_t *dvp, *vp;
   3252 	struct vattr bdva, idva, adva;
   3253 	char *nm;
   3254 	uint_t len;
   3255 	rfs4_file_t *fp;
   3256 	int in_crit = 0;
   3257 	bslabel_t *clabel;
   3258 	caller_context_t ct;
   3259 
   3260 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
   3261 	    REMOVE4args *, args);
   3262 
   3263 	/* CURRENT_FH: directory */
   3264 	dvp = cs->vp;
   3265 	if (dvp == NULL) {
   3266 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3267 		goto final;
   3268 	}
   3269 
   3270 	if (cs->access == CS_ACCESS_DENIED) {
   3271 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3272 		goto final;
   3273 	}
   3274 
   3275 	/*
   3276 	 * If there is an unshared filesystem mounted on this vnode,
   3277 	 * Do not allow to remove anything in this directory.
   3278 	 */
   3279 	if (vn_ismntpt(dvp)) {
   3280 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3281 		goto final;
   3282 	}
   3283 
   3284 	if (dvp->v_type != VDIR) {
   3285 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   3286 		goto final;
   3287 	}
   3288 
   3289 	if (!utf8_dir_verify(&args->target)) {
   3290 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3291 		goto final;
   3292 	}
   3293 
   3294 	/*
   3295 	 * Lookup the file so that we can check if it's a directory
   3296 	 */
   3297 	nm = utf8_to_fn(&args->target, &len, NULL);
   3298 	if (nm == NULL) {
   3299 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3300 		goto final;
   3301 	}
   3302 
   3303 	if (len > MAXNAMELEN) {
   3304 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   3305 		kmem_free(nm, len);
   3306 		goto final;
   3307 	}
   3308 
   3309 	if (rdonly4(cs->exi, cs->vp, req)) {
   3310 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   3311 		kmem_free(nm, len);
   3312 		goto final;
   3313 	}
   3314 
   3315 	/*
   3316 	 * Lookup the file to determine type and while we are see if
   3317 	 * there is a file struct around and check for delegation.
   3318 	 * We don't need to acquire va_seq before this lookup, if
   3319 	 * it causes an update, cinfo.before will not match, which will
   3320 	 * trigger a cache flush even if atomic is TRUE.
   3321 	 */
   3322 	fp = mds_lookup_and_findfile(dvp, nm, &vp, &error, cs);
   3323 	if (vp != NULL) {
   3324 		if (rfs4_check_delegated(FWRITE, vp, TRUE, TRUE, TRUE, NULL)) {
   3325 			VN_RELE(vp);
   3326 			rfs4_file_rele(fp);
   3327 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   3328 			kmem_free(nm, len);
   3329 			goto final;
   3330 		}
   3331 	} else {	/* Didn't find anything to remove */
   3332 		*cs->statusp = resp->status = error;
   3333 		kmem_free(nm, len);
   3334 		goto final;
   3335 	}
   3336 
   3337 	if (nbl_need_check(vp)) {
   3338 		nbl_start_crit(vp, RW_READER);
   3339 		in_crit = 1;
   3340 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, &ct)) {
   3341 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   3342 			kmem_free(nm, len);
   3343 			nbl_end_crit(vp);
   3344 			VN_RELE(vp);
   3345 			if (fp) {
   3346 				rfs4_clear_dont_grant(cs->instp, fp);
   3347 				rfs4_file_rele(fp);
   3348 			}
   3349 			goto final;
   3350 		}
   3351 	}
   3352 
   3353 	/* check label before allowing removal */
   3354 	if (is_system_labeled()) {
   3355 		ASSERT(req->rq_label != NULL);
   3356 		clabel = req->rq_label;
   3357 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   3358 		    "got client label from request(1)",
   3359 		    struct svc_req *, req);
   3360 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3361 			if (!do_rfs_label_check(clabel, vp,
   3362 			    EQUALITY_CHECK, cs->exi)) {
   3363 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3364 				kmem_free(nm, len);
   3365 				if (in_crit)
   3366 					nbl_end_crit(vp);
   3367 				VN_RELE(vp);
   3368 				if (fp) {
   3369 					rfs4_clear_dont_grant(cs->instp, fp);
   3370 					rfs4_file_rele(fp);
   3371 				}
   3372 				goto final;
   3373 			}
   3374 		}
   3375 	}
   3376 
   3377 	ct.cc_sysid = 0;
   3378 	ct.cc_pid = 0;
   3379 	ct.cc_caller_id = cs->instp->caller_id;
   3380 	ct.cc_flags = CC_DONTBLOCK;
   3381 
   3382 	/* Get dir "before" change value */
   3383 	bdva.va_mask = AT_CTIME|AT_SEQ;
   3384 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct);
   3385 	if (error) {
   3386 		*cs->statusp = resp->status = puterrno4(error);
   3387 		kmem_free(nm, len);
   3388 		if (in_crit)
   3389 			nbl_end_crit(vp);
   3390 		VN_RELE(vp);
   3391 		if (fp) {
   3392 			rfs4_clear_dont_grant(cs->instp, fp);
   3393 			rfs4_file_rele(fp);
   3394 		}
   3395 		goto final;
   3396 	}
   3397 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   3398 
   3399 	/* Actually do the REMOVE operation */
   3400 	if (vp->v_type == VDIR) {
   3401 		/*
   3402 		 * Can't remove a directory that has a mounted-on filesystem.
   3403 		 */
   3404 		if (vn_ismntpt(vp)) {
   3405 			error = EACCES;
   3406 		} else {
   3407 			/*
   3408 			 * System V defines rmdir to return EEXIST,
   3409 			 * not * ENOTEMPTY, if the directory is not
   3410 			 * empty.  A System V NFS server needs to map
   3411 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
   3412 			 * transmit over the wire.
   3413 			 */
   3414 			if ((error = VOP_RMDIR(dvp, nm, rootdir,
   3415 			    cs->cr, &ct, 0)) == EEXIST)
   3416 				error = ENOTEMPTY;
   3417 		}
   3418 	} else {
   3419 		if ((error = VOP_REMOVE(dvp, nm, cs->cr, &ct, 0)) == 0 &&
   3420 		    fp != NULL) {
   3421 			struct vattr va;
   3422 			vnode_t *tvp;
   3423 
   3424 			rfs4_dbe_lock(fp->rf_dbe);
   3425 			tvp = fp->rf_vp;
   3426 			if (tvp)
   3427 				VN_HOLD(tvp);
   3428 			rfs4_dbe_unlock(fp->rf_dbe);
   3429 
   3430 			if (tvp) {
   3431 				/*
   3432 				 * This is va_seq safe because we are not
   3433 				 * manipulating dvp.
   3434 				 */
   3435 				va.va_mask = AT_NLINK;
   3436 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr,
   3437 				    &ct) && va.va_nlink == 0) {
   3438 					if (in_crit) {
   3439 						nbl_end_crit(vp);
   3440 						in_crit = 0;
   3441 					}
   3442 
   3443 					/* Remove the layout */
   3444 					mds_delete_layout(tvp);
   3445 
   3446 					/*
   3447 					 * Remove objects on data servers.
   3448 					 * Ignore errors for now..
   3449 					 */
   3450 					(void) do_ctl_mds_remove(tvp, fp, cs);
   3451 
   3452 					/* Remove state on file remove */
   3453 					rfs4_close_all_state(fp);
   3454 				}
   3455 				VN_RELE(tvp);
   3456 			}
   3457 		}
   3458 	}
   3459 
   3460 	if (in_crit)
   3461 		nbl_end_crit(vp);
   3462 	VN_RELE(vp);
   3463 
   3464 	if (fp) {
   3465 		rfs4_clear_dont_grant(cs->instp, fp);
   3466 		rfs4_file_rele(fp);
   3467 		fp = NULL;
   3468 	}
   3469 	kmem_free(nm, len);
   3470 
   3471 	if (error) {
   3472 		*cs->statusp = resp->status = puterrno4(error);
   3473 		goto final;
   3474 	}
   3475 
   3476 	/*
   3477 	 * Get the initial "after" sequence number, if it fails, set to zero
   3478 	 */
   3479 	idva.va_mask = AT_SEQ;
   3480 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct))
   3481 		idva.va_seq = 0;
   3482 
   3483 	/*
   3484 	 * Force modified data and metadata out to stable storage.
   3485 	 */
   3486 	(void) VOP_FSYNC(dvp, 0, cs->cr, &ct);
   3487 
   3488 	/*
   3489 	 * Get "after" change value, if it fails, simply return the
   3490 	 * before value.
   3491 	 */
   3492 	adva.va_mask = AT_CTIME|AT_SEQ;
   3493 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) {
   3494 		adva.va_ctime = bdva.va_ctime;
   3495 		adva.va_seq = 0;
   3496 	}
   3497 
   3498 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   3499 
   3500 	/*
   3501 	 * The cinfo.atomic = TRUE only if we have
   3502 	 * non-zero va_seq's, and it has incremented by exactly one
   3503 	 * during the VOP_REMOVE/RMDIR and it didn't change during
   3504 	 * the VOP_FSYNC.
   3505 	 */
   3506 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   3507 	    idva.va_seq == (bdva.va_seq + 1) &&
   3508 	    idva.va_seq == adva.va_seq)
   3509 		resp->cinfo.atomic = TRUE;
   3510 	else
   3511 		resp->cinfo.atomic = FALSE;
   3512 
   3513 	*cs->statusp = resp->status = NFS4_OK;
   3514 
   3515 final:
   3516 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
   3517 	    REMOVE4res *, resp);
   3518 }
   3519 
   3520 /*
   3521  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
   3522  *		oldname and newname.
   3523  *	res: status. If success - CURRENT_FH unchanged, return change_info
   3524  *		for both from and target directories.
   3525  */
   3526 /* ARGSUSED */
   3527 static void
   3528 mds_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3529 	compound_state_t *cs)
   3530 {
   3531 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
   3532 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
   3533 	int error;
   3534 	vnode_t *odvp;
   3535 	vnode_t *ndvp;
   3536 	vnode_t *srcvp, *targvp;
   3537 	struct vattr obdva, oidva, oadva;
   3538 	struct vattr nbdva, nidva, nadva;
   3539 	char *onm, *nnm;
   3540 	uint_t olen, nlen;
   3541 	rfs4_file_t *fp, *sfp;
   3542 	int in_crit_src, in_crit_targ;
   3543 	int fp_rele_grant_hold, sfp_rele_grant_hold;
   3544 	bslabel_t *clabel;
   3545 	caller_context_t ct;
   3546 
   3547 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
   3548 	    RENAME4args *, args);
   3549 
   3550 	fp = sfp = NULL;
   3551 	srcvp = targvp = NULL;
   3552 	in_crit_src = in_crit_targ = 0;
   3553 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
   3554 
   3555 	/* CURRENT_FH: target directory */
   3556 	ndvp = cs->vp;
   3557 	if (ndvp == NULL) {
   3558 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3559 		goto final;
   3560 	}
   3561 
   3562 	/* SAVED_FH: from directory */
   3563 	odvp = cs->saved_vp;
   3564 	if (odvp == NULL) {
   3565 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3566 		goto final;
   3567 	}
   3568 
   3569 	if (cs->access == CS_ACCESS_DENIED) {
   3570 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3571 		goto final;
   3572 	}
   3573 
   3574 	/*
   3575 	 * If there is an unshared filesystem mounted on this vnode,
   3576 	 * do not allow to rename objects in this directory.
   3577 	 */
   3578 	if (vn_ismntpt(odvp)) {
   3579 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3580 		goto final;
   3581 	}
   3582 
   3583 	/*
   3584 	 * If there is an unshared filesystem mounted on this vnode,
   3585 	 * do not allow to rename to this directory.
   3586 	 */
   3587 	if (vn_ismntpt(ndvp)) {
   3588 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3589 		goto final;
   3590 	}
   3591 
   3592 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
   3593 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   3594 		goto final;
   3595 	}
   3596 
   3597 	if (cs->saved_exi != cs->exi) {
   3598 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   3599 		goto final;
   3600 	}
   3601 
   3602 	if (!utf8_dir_verify(&args->oldname)) {
   3603 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3604 		goto final;
   3605 	}
   3606 
   3607 	if (!utf8_dir_verify(&args->newname)) {
   3608 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3609 		goto final;
   3610 	}
   3611 
   3612 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
   3613 	if (onm == NULL) {
   3614 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3615 		goto final;
   3616 	}
   3617 
   3618 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   3619 	if (nnm == NULL) {
   3620 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3621 		kmem_free(onm, olen);
   3622 		goto final;
   3623 	}
   3624 
   3625 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
   3626 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   3627 		kmem_free(onm, olen);
   3628 		kmem_free(nnm, nlen);
   3629 		goto final;
   3630 	}
   3631 
   3632 
   3633 	if (rdonly4(cs->exi, cs->vp, req)) {
   3634 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   3635 		kmem_free(onm, olen);
   3636 		kmem_free(nnm, nlen);
   3637 		goto final;
   3638 	}
   3639 
   3640 	/* check label of the target dir */
   3641 	if (is_system_labeled()) {
   3642 		ASSERT(req->rq_label != NULL);
   3643 		clabel = req->rq_label;
   3644 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
   3645 		    "got client label from request(1)",
   3646 		    struct svc_req *, req);
   3647 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3648 			if (!do_rfs_label_check(clabel, ndvp,
   3649 			    EQUALITY_CHECK, cs->exi)) {
   3650 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3651 				goto final;
   3652 			}
   3653 		}
   3654 	}
   3655 
   3656 	/*
   3657 	 * Is the source a file and have a delegation?
   3658 	 * We don't need to acquire va_seq before these lookups, if
   3659 	 * it causes an update, cinfo.before will not match, which will
   3660 	 * trigger a cache flush even if atomic is TRUE.
   3661 	 */
   3662 	sfp = mds_lookup_and_findfile(odvp, onm, &srcvp, &error, cs);
   3663 	if (srcvp != NULL) {
   3664 		if (rfs4_check_delegated(FWRITE, srcvp, TRUE, TRUE, TRUE,
   3665 		    NULL)) {
   3666 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   3667 			goto err_out;
   3668 		}
   3669 	} else {
   3670 		*cs->statusp = resp->status = puterrno4(error);
   3671 		kmem_free(onm, olen);
   3672 		kmem_free(nnm, nlen);
   3673 		goto final;
   3674 	}
   3675 
   3676 	sfp_rele_grant_hold = 1;
   3677 
   3678 	/* Does the destination exist and a file and have a delegation? */
   3679 	fp = mds_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs);
   3680 	if (targvp != NULL) {
   3681 		if (rfs4_check_delegated(FWRITE, targvp, TRUE, TRUE, TRUE,
   3682 		    NULL)) {
   3683 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   3684 			goto err_out;
   3685 		}
   3686 	}
   3687 
   3688 	fp_rele_grant_hold = 1;
   3689 
   3690 	/* Check for NBMAND lock on both source and target */
   3691 	if (nbl_need_check(srcvp)) {
   3692 		nbl_start_crit(srcvp, RW_READER);
   3693 		in_crit_src = 1;
   3694 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, &ct)) {
   3695 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   3696 			goto err_out;
   3697 		}
   3698 	}
   3699 
   3700 	if (targvp && nbl_need_check(targvp)) {
   3701 		nbl_start_crit(targvp, RW_READER);
   3702 		in_crit_targ = 1;
   3703 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, &ct)) {
   3704 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   3705 			goto err_out;
   3706 		}
   3707 	}
   3708 
   3709 	ct.cc_sysid = 0;
   3710 	ct.cc_pid = 0;
   3711 	ct.cc_caller_id = cs->instp->caller_id;
   3712 	ct.cc_flags = CC_DONTBLOCK;
   3713 
   3714 	/* Get source "before" change value */
   3715 	obdva.va_mask = AT_CTIME|AT_SEQ;
   3716 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, &ct);
   3717 	if (!error) {
   3718 		nbdva.va_mask = AT_CTIME|AT_SEQ;
   3719 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, &ct);
   3720 	}
   3721 	if (error) {
   3722 		*cs->statusp = resp->status = puterrno4(error);
   3723 		goto err_out;
   3724 	}
   3725 
   3726 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
   3727 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
   3728 
   3729 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr, &ct, 0)) ==
   3730 	    0 && fp != NULL) {
   3731 		struct vattr va;
   3732 		vnode_t *tvp;
   3733 
   3734 		rfs4_dbe_lock(fp->rf_dbe);
   3735 		tvp = fp->rf_vp;
   3736 		if (tvp)
   3737 			VN_HOLD(tvp);
   3738 		rfs4_dbe_unlock(fp->rf_dbe);
   3739 
   3740 		if (tvp) {
   3741 			va.va_mask = AT_NLINK;
   3742 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, &ct) &&
   3743 			    va.va_nlink == 0) {
   3744 				/* The file is gone and so should the state */
   3745 				if (in_crit_targ) {
   3746 					nbl_end_crit(targvp);
   3747 					in_crit_targ = 0;
   3748 				}
   3749 				rfs4_close_all_state(fp);
   3750 			}
   3751 			VN_RELE(tvp);
   3752 		}
   3753 	}
   3754 	if (error == 0) {
   3755 		char *tmp;
   3756 
   3757 		/* fix the path name for the renamed file */
   3758 		mutex_enter(&srcvp->v_lock);
   3759 		tmp = srcvp->v_path;
   3760 		srcvp->v_path = NULL;
   3761 		mutex_exit(&srcvp->v_lock);
   3762 		vn_setpath(rootdir, ndvp, srcvp, nnm, nlen - 1);
   3763 		if (tmp != NULL)
   3764 			kmem_free(tmp, strlen(tmp) + 1);
   3765 	}
   3766 
   3767 	if (in_crit_src)
   3768 		nbl_end_crit(srcvp);
   3769 	if (srcvp)
   3770 		VN_RELE(srcvp);
   3771 	if (in_crit_targ)
   3772 		nbl_end_crit(targvp);
   3773 	if (targvp)
   3774 		VN_RELE(targvp);
   3775 
   3776 	if (sfp) {
   3777 		rfs4_clear_dont_grant(cs->instp, sfp);
   3778 		rfs4_file_rele(sfp);
   3779 		sfp = NULL;
   3780 	}
   3781 	if (fp) {
   3782 		rfs4_clear_dont_grant(cs->instp, fp);
   3783 		rfs4_file_rele(fp);
   3784 		fp = NULL;
   3785 	}
   3786 
   3787 	kmem_free(onm, olen);
   3788 	kmem_free(nnm, nlen);
   3789 
   3790 	/*
   3791 	 * Get the initial "after" sequence number, if it fails, set to zero
   3792 	 */
   3793 	oidva.va_mask = AT_SEQ;
   3794 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, &ct))
   3795 		oidva.va_seq = 0;
   3796 
   3797 	nidva.va_mask = AT_SEQ;
   3798 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, &ct))
   3799 		nidva.va_seq = 0;
   3800 
   3801 	/*
   3802 	 * Force modified data and metadata out to stable storage.
   3803 	 */
   3804 	(void) VOP_FSYNC(odvp, 0, cs->cr, &ct);
   3805 	(void) VOP_FSYNC(ndvp, 0, cs->cr, &ct);
   3806 
   3807 	if (error) {
   3808 		*cs->statusp = resp->status = puterrno4(error);
   3809 		goto final;
   3810 	}
   3811 
   3812 	/*
   3813 	 * Get "after" change values, if it fails, simply return the
   3814 	 * before value.
   3815 	 */
   3816 	oadva.va_mask = AT_CTIME|AT_SEQ;
   3817 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, &ct)) {
   3818 		oadva.va_ctime = obdva.va_ctime;
   3819 		oadva.va_seq = 0;
   3820 	}
   3821 
   3822 	nadva.va_mask = AT_CTIME|AT_SEQ;
   3823 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, &ct)) {
   3824 		nadva.va_ctime = nbdva.va_ctime;
   3825 		nadva.va_seq = 0;
   3826 	}
   3827 
   3828 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
   3829 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
   3830 
   3831 	/*
   3832 	 * The cinfo.atomic = TRUE only if we have
   3833 	 * non-zero va_seq's, and it has incremented by exactly one
   3834 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
   3835 	 */
   3836 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
   3837 	    oidva.va_seq == (obdva.va_seq + 1) &&
   3838 	    oidva.va_seq == oadva.va_seq)
   3839 		resp->source_cinfo.atomic = TRUE;
   3840 	else
   3841 		resp->source_cinfo.atomic = FALSE;
   3842 
   3843 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
   3844 	    nidva.va_seq == (nbdva.va_seq + 1) &&
   3845 	    nidva.va_seq == nadva.va_seq)
   3846 		resp->target_cinfo.atomic = TRUE;
   3847 	else
   3848 		resp->target_cinfo.atomic = FALSE;
   3849 
   3850 	*cs->statusp = resp->status = NFS4_OK;
   3851 	goto final;
   3852 
   3853 err_out:
   3854 	kmem_free(onm, olen);
   3855 	kmem_free(nnm, nlen);
   3856 
   3857 	if (in_crit_src) nbl_end_crit(srcvp);
   3858 	if (in_crit_targ) nbl_end_crit(targvp);
   3859 	if (targvp) VN_RELE(targvp);
   3860 	if (srcvp) VN_RELE(srcvp);
   3861 	if (sfp) {
   3862 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(cs->instp, sfp);
   3863 		rfs4_file_rele(sfp);
   3864 	}
   3865 	if (fp) {
   3866 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(cs->instp, fp);
   3867 		rfs4_file_rele(fp);
   3868 	}
   3869 
   3870 final:
   3871 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
   3872 	    RENAME4res *, resp);
   3873 }
   3874 
   3875 
   3876 /* ARGSUSED */
   3877 static void
   3878 mds_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   3879 	compound_state_t *cs)
   3880 {
   3881 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
   3882 
   3883 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
   3884 
   3885 	/* No need to check cs->access - we are not accessing any object */
   3886 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
   3887 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
   3888 		goto final;
   3889 	}
   3890 	if (cs->vp != NULL) {
   3891 		VN_RELE(cs->vp);
   3892 	}
   3893 	cs->vp = cs->saved_vp;
   3894 	cs->saved_vp = NULL;
   3895 	cs->exi = cs->saved_exi;
   3896 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
   3897 	*cs->statusp = resp->status = NFS4_OK;
   3898 	cs->deleg = FALSE;
   3899 
   3900 final:
   3901 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
   3902 	    RESTOREFH4res *, resp);
   3903 }
   3904 
   3905 /* ARGSUSED */
   3906 static void
   3907 mds_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3908 	compound_state_t *cs)
   3909 {
   3910 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
   3911 
   3912 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
   3913 
   3914 	/* No need to check cs->access - we are not accessing any object */
   3915 	if (cs->vp == NULL) {
   3916 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3917 		goto final;
   3918 	}
   3919 	if (cs->saved_vp != NULL) {
   3920 		VN_RELE(cs->saved_vp);
   3921 	}
   3922 	cs->saved_vp = cs->vp;
   3923 	VN_HOLD(cs->saved_vp);
   3924 	cs->saved_exi = cs->exi;
   3925 	/*
   3926 	 * since SAVEFH is fairly rare, don't alloc space for its fh
   3927 	 * unless necessary.
   3928 	 */
   3929 	if (cs->saved_fh.nfs_fh4_val == NULL) {
   3930 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
   3931 	}
   3932 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
   3933 	*cs->statusp = resp->status = NFS4_OK;
   3934 
   3935 final:
   3936 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
   3937 	    SAVEFH4res *, resp);
   3938 }
   3939 
   3940 /* ARGSUSED */
   3941 static void
   3942 mds_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3943 	compound_state_t *cs)
   3944 {
   3945 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
   3946 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
   3947 	bslabel_t *clabel;
   3948 
   3949 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
   3950 	    SETATTR4args *, args);
   3951 
   3952 	if (cs->vp == NULL) {
   3953 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3954 		goto final;
   3955 	}
   3956 
   3957 	/*
   3958 	 * If there is an unshared filesystem mounted on this vnode,
   3959 	 * do not allow to setattr on this vnode.
   3960 	 */
   3961 	if (vn_ismntpt(cs->vp)) {
   3962 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3963 		goto final;
   3964 	}
   3965 
   3966 	resp->attrsset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   3967 
   3968 	if (rdonly4(cs->exi, cs->vp, req)) {
   3969 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   3970 		goto final;
   3971 	}
   3972 
   3973 	/* check label before setting attributes */
   3974 	if (is_system_labeled()) {
   3975 		ASSERT(req->rq_label != NULL);
   3976 		clabel = req->rq_label;
   3977 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
   3978 		    "got client label from request(1)",
   3979 		    struct svc_req *, req);
   3980 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3981 			if (!do_rfs_label_check(clabel, cs->vp,
   3982 			    EQUALITY_CHECK, cs->exi)) {
   3983 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3984 				goto final;
   3985 			}
   3986 		}
   3987 	}
   3988 
   3989 	*cs->statusp = resp->status =
   3990 	    mds_setattr(&resp->attrsset, &args->obj_attributes, cs,
   3991 	    &args->stateid);
   3992 
   3993 final:
   3994 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
   3995 	    SETATTR4res *, resp);
   3996 }
   3997 
   3998 /* ARGSUSED */
   3999 static void
   4000 mds_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4001 	compound_state_t *cs)
   4002 {
   4003 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
   4004 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
   4005 	nnode_io_flags_t nnioflags = NNODE_IO_FLAG_WRITE;
   4006 	int error;
   4007 	nnode_t *nn;
   4008 	u_offset_t rlimit;
   4009 	struct uio uio;
   4010 	struct iovec iov[NFS_MAX_IOVECS];
   4011 	struct iovec *iovp = iov;
   4012 	int iovcnt;
   4013 	int ioflag;
   4014 	cred_t *savecred, *cr;
   4015 	bool_t *deleg = &cs->deleg;
   4016 	nfsstat4 stat;
   4017 	caller_context_t ct;
   4018 
   4019 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
   4020 	    WRITE4args *, args);
   4021 
   4022 	nn = cs->nn;
   4023 	if (nn == NULL) {
   4024 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4025 		goto final;
   4026 	}
   4027 	/*
   4028 	 * cs->access is set in call_checkauth4 called in putfh code.  The
   4029 	 * current putfh code will not invoke these security functions on the
   4030 	 * DS codepath since it goes by the filehandle, not by nnodes per se.
   4031 	 */
   4032 	if (cs->access == CS_ACCESS_DENIED) {
   4033 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4034 		goto final;
   4035 	}
   4036 
   4037 	cr = cs->cr;
   4038 	if ((cs->vp != NULL) && (rdonly4(cs->exi, cs->vp, req))) {
   4039 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   4040 		goto final;
   4041 	}
   4042 
   4043 	if ((stat = nnop_check_stateid(nn, cs, FWRITE, &args->stateid, FALSE,
   4044 	    deleg, TRUE, &ct, NULL)) != NFS4_OK) {
   4045 		*cs->statusp = resp->status = stat;
   4046 		goto out;
   4047 	}
   4048 
   4049 	error = nnop_io_prep(nn, &nnioflags, cr, &ct, args->offset,
   4050 	    args->data_len, NULL);
   4051 	if (error != 0)
   4052 		goto err;
   4053 
   4054 	if (args->data_len == 0) {
   4055 		*cs->statusp = resp->status = NFS4_OK;
   4056 		resp->count = 0;
   4057 		resp->committed = args->stable;
   4058 		resp->writeverf = cs->instp->Write4verf;
   4059 		goto out;
   4060 	}
   4061 
   4062 	if (args->mblk != NULL) {
   4063 		mblk_t *m;
   4064 		uint_t bytes, round_len;
   4065 
   4066 		iovcnt = 0;
   4067 		bytes = 0;
   4068 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
   4069 		for (m = args->mblk;
   4070 		    m != NULL && bytes < round_len;
   4071 		    m = m->b_cont) {
   4072 			iovcnt++;
   4073 			bytes += MBLKL(m);
   4074 		}
   4075 #ifdef DEBUG
   4076 		/* should have ended on an mblk boundary */
   4077 		if (bytes != round_len) {
   4078 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
   4079 			    bytes, round_len, args->data_len);
   4080 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
   4081 			    (void *)args->mblk, (void *)m);
   4082 			ASSERT(bytes == round_len);
   4083 		}
   4084 #endif
   4085 		if (iovcnt <= NFS_MAX_IOVECS) {
   4086 			iovp = iov;
   4087 		} else {
   4088 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
   4089 		}
   4090 		mblk_to_iov(args->mblk, iovcnt, iovp);
   4091 	} else {
   4092 		iovcnt = 1;
   4093 		iovp = iov;
   4094 		iovp->iov_base = args->data_val;
   4095 		iovp->iov_len = args->data_len;
   4096 	}
   4097 
   4098 	uio.uio_iov = iovp;
   4099 	uio.uio_iovcnt = iovcnt;
   4100 
   4101 	uio.uio_segflg = UIO_SYSSPACE;
   4102 	uio.uio_extflg = UIO_COPY_DEFAULT;
   4103 	uio.uio_loffset = args->offset;
   4104 	uio.uio_resid = args->data_len;
   4105 	uio.uio_llimit = curproc->p_fsz_ctl;
   4106 	rlimit = uio.uio_llimit - args->offset;
   4107 	if (rlimit < (u_offset_t)uio.uio_resid)
   4108 		uio.uio_resid = (int)rlimit;
   4109 
   4110 	if (args->stable == UNSTABLE4)
   4111 		ioflag = 0;
   4112 	else if (args->stable == FILE_SYNC4)
   4113 		ioflag = FSYNC;
   4114 	else if (args->stable == DATA_SYNC4)
   4115 		ioflag = FDSYNC;
   4116 	else {
   4117 		if (iovp != iov)
   4118 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
   4119 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4120 		goto out;
   4121 	}
   4122 
   4123 	/*
   4124 	 * We're changing creds because VM may fault and we need
   4125 	 * the cred of the current thread to be used if quota
   4126 	 * checking is enabled.
   4127 	 */
   4128 	savecred = curthread->t_cred;
   4129 	curthread->t_cred = cr;
   4130 	error = nnop_write(nn, &nnioflags, &uio, ioflag, cr, &ct, NULL);
   4131 	curthread->t_cred = savecred;
   4132 
   4133 	if (iovp != iov)
   4134 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
   4135 
   4136 err:
   4137 	if (error) {
   4138 		*cs->statusp = resp->status = nnode_stat4(error, 1);
   4139 		goto out;
   4140 	}
   4141 
   4142 	*cs->statusp = resp->status = NFS4_OK;
   4143 	resp->count = args->data_len - uio.uio_resid;
   4144 
   4145 	if (ioflag == 0)
   4146 		resp->committed = UNSTABLE4;
   4147 	else
   4148 		resp->committed = FILE_SYNC4;
   4149 
   4150 	resp->writeverf = cs->instp->Write4verf;
   4151 
   4152 	nnop_update(nn, nnioflags, cr, &ct, args->offset + resp->count);
   4153 out:
   4154 	nnop_io_release(nn, nnioflags, &ct);
   4155 
   4156 final:
   4157 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
   4158 	    WRITE4res *, resp);
   4159 }
   4160 
   4161 static void
   4162 rfs41_op_dispatch(compound_state_t *cs,
   4163     COMPOUND4args *args, COMPOUND4res *resp, struct svc_req *req)
   4164 {
   4165 	nfs_argop4		*argop;
   4166 	nfs_resop4		*resop;
   4167 	uint_t			 op;
   4168 
   4169 	argop = &args->array[cs->op_ndx];
   4170 	resop = &resp->array[cs->op_ndx];
   4171 
   4172 	op = (uint_t)argop->argop;
   4173 	resop->resop = op;
   4174 
   4175 	if (op >= OP_ILLEGAL_IDX) {
   4176 		/*
   4177 		 * This is effectively dead code since XDR code
   4178 		 * will have already returned BADXDR if op doesn't
   4179 		 * decode to legal value.  This only done for a
   4180 		 * day when XDR code doesn't verify v4 opcodes.
   4181 		 * or some bozo didn't update the operation dispatch
   4182 		 * table.
   4183 		 */
   4184 		rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
   4185 
   4186 		mds_op_illegal(argop, resop, req, cs);
   4187 		DTRACE_PROBE(nfss41__e__operation_tilt);
   4188 		goto bail;
   4189 	}
   4190 
   4191 	/*
   4192 	 * First if this is a bad operation stop
   4193 	 * the compound processing right now!
   4194 	 */
   4195 	if (mds_disptab[op].op_flag == DISP_OP_BAD) {
   4196 		mds_op_illegal(argop, resop, req, cs);
   4197 		DTRACE_PROBE1(nfss41__e__disp_op_inval, int, op);
   4198 		goto bail;
   4199 	}
   4200 
   4201 	if (seq_chk_limits(argop, resop, cs)) {
   4202 		DTRACE_PROBE2(nfss41__i__scl_error,
   4203 		    char *, nfs4_op_to_str(op),
   4204 		    char *, nfs41_strerror(*cs->statusp));
   4205 	} else {
   4206 		(*mds_disptab[op].dis_op)(argop, resop, req, cs);
   4207 	}
   4208 
   4209 bail:
   4210 	if (*cs->statusp != NFS4_OK)
   4211 		cs->cont = FALSE;
   4212 
   4213 	/*
   4214 	 * If not at last op, and if we are to stop, then
   4215 	 * compact the results array.
   4216 	 */
   4217 	if ((cs->op_ndx + 1) < cs->op_len && !cs->cont) {
   4218 		nfs_resop4 *new_res = kmem_alloc(
   4219 		    (cs->op_ndx+1) * sizeof (nfs_resop4), KM_SLEEP);
   4220 		bcopy(resp->array,
   4221 		    new_res, (cs->op_ndx+1) * sizeof (nfs_resop4));
   4222 		kmem_free(resp->array,
   4223 		    cs->op_len * sizeof (nfs_resop4));
   4224 
   4225 		resp->array_len =  cs->op_ndx + 1;
   4226 		resp->array = new_res;
   4227 	}
   4228 }
   4229 
   4230 void
   4231 rfs41_err_resp(COMPOUND4args *args, COMPOUND4res *resp, nfsstat4 err)
   4232 {
   4233 	size_t	sz;
   4234 
   4235 	resp->array_len = 1;
   4236 	sz = resp->array_len * sizeof (nfs_resop4);
   4237 	resp->array = kmem_zalloc(sz, KM_SLEEP);
   4238 
   4239 	resp->array[0].resop = args->array[0].argop;
   4240 	resp->array[0].nfs_resop4_u.opillegal.status = err;
   4241 }
   4242 
   4243 
   4244 /* ARGSUSED */
   4245 void
   4246 mds_compound(compound_state_t *cs,
   4247     COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
   4248     struct svc_req *req, int *rv)
   4249 {
   4250 	cred_t *cr;
   4251 	size_t	reslen;
   4252 
   4253 	if (rv != NULL)
   4254 		*rv = 0;
   4255 	/*
   4256 	 * Form a reply tag by copying over the reqeuest tag.
   4257 	 */
   4258 	resp->tag.utf8string_val =
   4259 	    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
   4260 
   4261 	resp->tag.utf8string_len = args->tag.utf8string_len;
   4262 
   4263 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
   4264 	    resp->tag.utf8string_len);
   4265 
   4266 	ASSERT(exi == NULL);
   4267 
   4268 	cr = crget();
   4269 	ASSERT(cr != NULL);
   4270 
   4271 	if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
   4272 
   4273 		DTRACE_NFSV4_2(compound__start,
   4274 		    struct compound_state *,
   4275 		    &cs, COMPOUND4args *, args);
   4276 
   4277 		crfree(cr);
   4278 
   4279 		DTRACE_NFSV4_2(compound__done,
   4280 		    struct compound_state *,
   4281 		    &cs, COMPOUND4res *, resp);
   4282 
   4283 		svcerr_badcred(req->rq_xprt);
   4284 		if (rv != NULL)
   4285 			*rv = 1;
   4286 		return;
   4287 	}
   4288 	if (cs->basecr != NULL)
   4289 		crfree(cs->basecr);
   4290 	cs->basecr = cr;
   4291 	cs->req = req;
   4292 
   4293 	DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
   4294 	    COMPOUND4args *, args);
   4295 
   4296 	/*
   4297 	 * For now, NFS4 compound processing must be protected by
   4298 	 * exported_lock because it can access more than one exportinfo
   4299 	 * per compound and share/unshare can now change multiple
   4300 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
   4301 	 * per proc (excluding public exinfo), and exi_count design
   4302 	 * is sufficient to protect concurrent execution of NFS2/3
   4303 	 * ops along with unexport.
   4304 	 */
   4305 	rw_enter(&exported_lock, RW_READER);
   4306 
   4307 	/*
   4308 	 * If this is the first compound we've seen, we need to start
   4309 	 * the instances' grace period.
   4310 	 */
   4311 	if (cs->instp->seen_first_compound == 0) {
   4312 		rfs4_grace_start_new(cs->instp);
   4313 		cs->instp->seen_first_compound = 1;
   4314 	}
   4315 
   4316 	/*
   4317 	 * Any operations _other_ than the ones listed below, should _not_
   4318 	 * appear as the first operation in a compound. If so we will
   4319 	 * error out. We use the opilleagal.status without regard to
   4320 	 * the actual operation since we know that status always appears
   4321 	 * as the first element for all the operations.
   4322 	 */
   4323 	switch (args->array[0].argop) {
   4324 	case OP_SEQUENCE:
   4325 	case OP_EXCHANGE_ID:
   4326 	case OP_CREATE_SESSION:
   4327 	case OP_DESTROY_SESSION:
   4328 		break;
   4329 
   4330 	case OP_BIND_CONN_TO_SESSION:
   4331 		/*
   4332 		 * Should be the _only_ op in compound
   4333 		 */
   4334 		if (args->array_len != 1) {
   4335 			*cs->statusp = NFS4ERR_NOT_ONLY_OP;
   4336 			rfs41_err_resp(args, resp, *cs->statusp);
   4337 			goto out;
   4338 		}
   4339 		break;
   4340 
   4341 	default:
   4342 		*cs->statusp = NFS4ERR_OP_NOT_IN_SESSION;
   4343 		rfs41_err_resp(args, resp, *cs->statusp);
   4344 		goto out;
   4345 	}
   4346 
   4347 	/*
   4348 	 * Everything kosher; allocate results array
   4349 	 */
   4350 	reslen = cs->op_len = resp->array_len = args->array_len;
   4351 	resp->array = kmem_zalloc(reslen * sizeof (nfs_resop4), KM_SLEEP);
   4352 
   4353 	/*
   4354 	 * Iterate over the compound until we have exhausted the operations
   4355 	 * or the compound state indicates that we should terminate.
   4356 	 */
   4357 	for (cs->op_ndx = 0;
   4358 	    cs->op_ndx < cs->op_len && cs->cont == TRUE; cs->op_ndx++)
   4359 		rfs41_op_dispatch(cs, args, resp, req);
   4360 
   4361 out:
   4362 	rw_exit(&exported_lock);
   4363 
   4364 	/*
   4365 	 * done with this compound request, free the label
   4366 	 */
   4367 	if (req->rq_label != NULL) {
   4368 		kmem_free(req->rq_label, sizeof (bslabel_t));
   4369 		req->rq_label = NULL;
   4370 	}
   4371 
   4372 	DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
   4373 	    COMPOUND4res *, resp);
   4374 }
   4375 
   4376 void
   4377 rfs41_compound_free(COMPOUND4res *resp, compound_state_t *cs)
   4378 {
   4379 	uint_t i;
   4380 
   4381 	if (resp->tag.utf8string_val) {
   4382 		UTF8STRING_FREE(resp->tag)
   4383 	}
   4384 
   4385 	for (i = 0; i < resp->array_len; i++) {
   4386 		nfs_resop4 *resop;
   4387 		uint_t op;
   4388 
   4389 		resop = &resp->array[i];
   4390 		op = (uint_t)resop->resop;
   4391 		if (op < OP_ILLEGAL_IDX) {
   4392 			(*mds_disptab[op].dis_resfree)(resop, cs);
   4393 		}
   4394 	}
   4395 
   4396 	if (resp->array != NULL) {
   4397 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
   4398 		resp->array = NULL;
   4399 		resp->array_len = 0;
   4400 	}
   4401 }
   4402 
   4403 delegreq_t
   4404 do_41_deleg_hack(int osa)
   4405 {
   4406 	int want_deleg;
   4407 
   4408 	want_deleg = (osa & OPEN4_SHARE_ACCESS_WANT_DELEG_MASK);
   4409 
   4410 	switch (want_deleg) {
   4411 	case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
   4412 		return (DELEG_READ);
   4413 
   4414 	case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
   4415 		return (DELEG_WRITE);
   4416 
   4417 	case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
   4418 		return (DELEG_ANY);
   4419 
   4420 	case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
   4421 		return (DELEG_NONE);
   4422 	}
   4423 	return (DELEG_ANY);
   4424 }
   4425 
   4426 /*
   4427  * XXX: This will go away with the SMF work for npools.
   4428  */
   4429 extern mds_layout_t *mds_gen_default_layout(nfs_server_instance_t *);
   4430 
   4431 /*
   4432  * We are going to create the file, so we need to get
   4433  * a layout in play for it.
   4434  */
   4435 static nfsstat4
   4436 mds_createfile_get_layout(struct svc_req *req, vnode_t *vp,
   4437     struct compound_state *cs, caller_context_t *ct, mds_layout_t **plo)
   4438 {
   4439 	vattr_t		spe_va;
   4440 
   4441 	int		i;
   4442 
   4443 	layout_core_t	lc;
   4444 
   4445 	int		error;
   4446 	struct netbuf	*claddr;
   4447 
   4448 	nfsstat4	status = NFS4_OK;
   4449 
   4450 	spe_va.va_mask = AT_GID|AT_UID;
   4451 	error = VOP_GETATTR(vp, &spe_va, 0, cs->cr, ct);
   4452 	if (error)
   4453 		return (puterrno4(error));
   4454 
   4455 	/*
   4456 	 * Taken from nfsauth_cache_get():
   4457 	 */
   4458 	claddr = svc_getrpccaller(req->rq_xprt);
   4459 
   4460 	lc.lc_mds_sids = NULL;
   4461 
   4462 	/*
   4463 	 * XXX: We may not be able to trust vp->v_path,
   4464 	 * but if it is filled in, we will use it. Otherwise
   4465 	 * we will evaluate polices ignoring the path components.
   4466 	 */
   4467 	error = nfs41_spe_allocate(&spe_va, claddr,
   4468 	    vp->v_path, &lc, TRUE);
   4469 	if (error) {
   4470 		/*
   4471 		 * XXX: Until we get the SMF code
   4472 		 * in place, we handle all errors by
   4473 		 * using the default layout of the
   4474 		 * old prototype code
   4475 		 *
   4476 		 * At that point, we should return the
   4477 		 * given error.
   4478 		 */
   4479 		*plo = mds_gen_default_layout(cs->instp);
   4480 		if (*plo == NULL) {
   4481 			status = NFS4ERR_LAYOUTUNAVAILABLE;
   4482 		} else {
   4483 			/*
   4484 			 * Record the layout, don't get
   4485 			 * bent out of shape if it fails,
   4486 			 * we'll try again at checkstate time.
   4487 			 */
   4488 			(void) mds_put_layout(*plo, vp);
   4489 		}
   4490 
   4491 		return (status);
   4492 	}
   4493 
   4494 	*plo = mds_add_layout(&lc);
   4495 
   4496 	if (lc.lc_mds_sids) {
   4497 		for (i = 0; i < lc.lc_stripe_count; i++) {
   4498 			kmem_free(lc.lc_mds_sids[i].val,
   4499 			    lc.lc_mds_sids[i].len);
   4500 		}
   4501 
   4502 		kmem_free(lc.lc_mds_sids,
   4503 		    lc.lc_stripe_count * sizeof (mds_sid));
   4504 	}
   4505 
   4506 	if (*plo == NULL) {
   4507 		status = NFS4ERR_LAYOUTUNAVAILABLE;
   4508 	} else {
   4509 		/*
   4510 		 * Record the layout, don't get bent out of shape
   4511 		 * if it fails, we'll try again at checkstate time.
   4512 		 */
   4513 		(void) mds_put_layout(*plo, vp);
   4514 	}
   4515 
   4516 	return (status);
   4517 }
   4518 
   4519 /*
   4520  * If we call the spe in here, we return the new layout in *plo.
   4521  */
   4522 static nfsstat4
   4523 mds_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
   4524     change_info4 *cinfo, attrmap4 *attrset, mds_layout_t **plo)
   4525 {
   4526 	struct nfs4_svgetit_arg sarg;
   4527 	struct nfs4_ntov_table ntov;
   4528 
   4529 	bool_t ntov_table_init = FALSE;
   4530 	struct statvfs64 sb;
   4531 	nfsstat4	status = NFS4_OK;
   4532 	vnode_t *vp;
   4533 	vattr_t bva, ava, iva, cva, *vap;
   4534 	vnode_t *dvp;
   4535 	timespec32_t *mtime;
   4536 	char *nm = NULL;
   4537 	uint_t buflen;
   4538 	bool_t created;
   4539 	bool_t setsize = FALSE;
   4540 	len_t reqsize;
   4541 	int error;
   4542 	bool_t trunc;
   4543 	caller_context_t ct;
   4544 	component4 *component;
   4545 	bslabel_t *clabel;
   4546 	attrvers_t avers;
   4547 
   4548 	avers = RFS4_ATTRVERS(cs);
   4549 	sarg.sbp = &sb;
   4550 	dvp = cs->vp;
   4551 
   4552 	/* Check if the file system is read only */
   4553 	if (rdonly4(cs->exi, dvp, req))
   4554 		return (NFS4ERR_ROFS);
   4555 
   4556 	/* check the label of including directory */
   4557 	if (is_system_labeled()) {
   4558 		ASSERT(req->rq_label != NULL);
   4559 		clabel = req->rq_label;
   4560 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   4561 		    "got client label from request(1)",
   4562 		    struct svc_req *, req);
   4563 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   4564 			if (!do_rfs_label_check(clabel, dvp,
   4565 			    EQUALITY_CHECK, cs->exi)) {
   4566 				return (NFS4ERR_ACCESS);
   4567 			}
   4568 		}
   4569 	}
   4570 
   4571 	/*
   4572 	 * Get the last component of path name in nm. cs will reference
   4573 	 * the including directory on success.
   4574 	 */
   4575 	component = &args->open_claim4_u.file;
   4576 	if (!utf8_dir_verify(component))
   4577 		return (NFS4ERR_INVAL);
   4578 
   4579 	nm = utf8_to_fn(component, &buflen, NULL);
   4580 
   4581 	if (nm == NULL)
   4582 		return (NFS4ERR_RESOURCE);
   4583 
   4584 	if (buflen > MAXNAMELEN) {
   4585 		kmem_free(nm, buflen);
   4586 		return (NFS4ERR_NAMETOOLONG);
   4587 	}
   4588 
   4589 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
   4590 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   4591 	if (error) {
   4592 		kmem_free(nm, buflen);
   4593 		return (puterrno4(error));
   4594 	}
   4595 
   4596 	if (bva.va_type != VDIR) {
   4597 		kmem_free(nm, buflen);
   4598 		return (NFS4ERR_NOTDIR);
   4599 	}
   4600 
   4601 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
   4602 
   4603 	switch (args->mode) {
   4604 	case GUARDED4:
   4605 		/*FALLTHROUGH*/
   4606 	case UNCHECKED4:
   4607 		nfs4_ntov_table_init(&ntov, avers);
   4608 		ntov_table_init = TRUE;
   4609 
   4610 		*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4611 		status = do_rfs4_set_attrs(attrset,
   4612 		    &args->createhow4_u.createattrs,
   4613 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
   4614 
   4615 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
   4616 		    sarg.vap->va_type != VREG) {
   4617 			if (sarg.vap->va_type == VDIR)
   4618 				status = NFS4ERR_ISDIR;
   4619 			else if (sarg.vap->va_type == VLNK)
   4620 				status = NFS4ERR_SYMLINK;
   4621 			else
   4622 				status = NFS4ERR_INVAL;
   4623 		}
   4624 
   4625 		if (status != NFS4_OK) {
   4626 			kmem_free(nm, buflen);
   4627 			nfs4_ntov_table_free(&ntov, &sarg);
   4628 			*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4629 			return (status);
   4630 		}
   4631 
   4632 		vap = sarg.vap;
   4633 		vap->va_type = VREG;
   4634 		vap->va_mask |= AT_TYPE;
   4635 
   4636 		if ((vap->va_mask & AT_MODE) == 0) {
   4637 			vap->va_mask |= AT_MODE;
   4638 			vap->va_mode = (mode_t)0600;
   4639 		}
   4640 
   4641 		if (vap->va_mask & AT_SIZE) {
   4642 
   4643 			/* Disallow create with a non-zero size */
   4644 
   4645 			if ((reqsize = sarg.vap->va_size) != 0) {
   4646 				kmem_free(nm, buflen);
   4647 				nfs4_ntov_table_free(&ntov, &sarg);
   4648 				*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4649 				return (NFS4ERR_INVAL);
   4650 			}
   4651 			setsize = TRUE;
   4652 		}
   4653 		break;
   4654 
   4655 	case EXCLUSIVE4:
   4656 		/* prohibit EXCL create of named attributes */
   4657 		if (dvp->v_flag & V_XATTRDIR) {
   4658 			kmem_free(nm, buflen);
   4659 			*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4660 			return (NFS4ERR_INVAL);
   4661 		}
   4662 
   4663 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
   4664 		cva.va_type = VREG;
   4665 		/*
   4666 		 * Ensure no time overflows. Assumes underlying
   4667 		 * filesystem supports at least 32 bits.
   4668 		 * Truncate nsec to usec resolution to allow valid
   4669 		 * compares even if the underlying filesystem truncates.
   4670 		 */
   4671 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
   4672 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
   4673 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
   4674 		cva.va_mode = (mode_t)0;
   4675 		vap = &cva;
   4676 		break;
   4677 
   4678 	case EXCLUSIVE4_1:
   4679 		kmem_free(nm, buflen);
   4680 		*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4681 		return (NFS4ERR_INVAL);
   4682 	}
   4683 
   4684 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
   4685 	    cs->cr, &vp, &created);
   4686 	kmem_free(nm, buflen);
   4687 
   4688 	if (status != NFS4_OK) {
   4689 		if (ntov_table_init)
   4690 			nfs4_ntov_table_free(&ntov, &sarg);
   4691 		*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4692 		return (status);
   4693 	}
   4694 
   4695 	trunc = (setsize && !created);
   4696 
   4697 	if (args->mode != EXCLUSIVE4) {
   4698 		attrmap4 createmask = args->createhow4_u.createattrs.attrmask;
   4699 
   4700 		/*
   4701 		 * True verification that object was created with correct
   4702 		 * attrs is impossible.  The attrs could have been changed
   4703 		 * immediately after object creation.  If attributes did
   4704 		 * not verify, the only recourse for the server is to
   4705 		 * destroy the object.  Maybe if some attrs (like gid)
   4706 		 * are set incorrectly, the object should be destroyed;
   4707 		 * however, seems bad as a default policy.  Do we really
   4708 		 * want to destroy an object over one of the times not
   4709 		 * verifying correctly?  For these reasons, the server
   4710 		 * currently sets bits in attrset for createattrs
   4711 		 * that were set; however, no verification is done.
   4712 		 *
   4713 		 * vmask_to_nmask accounts for vattr bits set on create
   4714 		 *	[do_rfs4_set_attrs() only sets resp bits for
   4715 		 *	 non-vattr/vfs bits.]
   4716 		 * Mask off any bits we set by default so as not to return
   4717 		 * more attrset bits than were requested in createattrs
   4718 		 */
   4719 		if (created) {
   4720 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset, avers);
   4721 			ATTRMAP_MASK(*attrset, createmask);
   4722 		} else {
   4723 			/*
   4724 			 * We did not create the vnode (we tried but it
   4725 			 * already existed).  In this case, the only createattr
   4726 			 * that the spec allows the server to set is size,
   4727 			 * and even then, it can only be set if it is 0.
   4728 			 */
   4729 			*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4730 			if (trunc)
   4731 				ATTR_SET(*attrset, SIZE);
   4732 		}
   4733 	}
   4734 	if (ntov_table_init)
   4735 		nfs4_ntov_table_free(&ntov, &sarg);
   4736 
   4737 	/*
   4738 	 * Get the initial "after" sequence number, if it fails,
   4739 	 * set to zero, time to before.
   4740 	 */
   4741 	iva.va_mask = AT_CTIME|AT_SEQ;
   4742 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
   4743 		iva.va_seq = 0;
   4744 		iva.va_ctime = bva.va_ctime;
   4745 	}
   4746 
   4747 	/*
   4748 	 * create_vnode attempts to create the file exclusive,
   4749 	 * if it already exists the VOP_CREATE will fail and
   4750 	 * may not increase va_seq. It is atomic if
   4751 	 * we haven't changed the directory, but if it has changed
   4752 	 * we don't know what changed it.
   4753 	 */
   4754 	if (!created) {
   4755 		if (bva.va_seq && iva.va_seq &&
   4756 		    bva.va_seq == iva.va_seq)
   4757 			cinfo->atomic = TRUE;
   4758 		else
   4759 			cinfo->atomic = FALSE;
   4760 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
   4761 	} else {
   4762 		/*
   4763 		 * The entry was created, we need to sync the
   4764 		 * directory metadata.
   4765 		 */
   4766 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   4767 
   4768 		/*
   4769 		 * Get "after" change value, if it fails, simply return the
   4770 		 * before value.
   4771 		 */
   4772 		ava.va_mask = AT_CTIME|AT_SEQ;
   4773 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   4774 			ava.va_ctime = bva.va_ctime;
   4775 			ava.va_seq = 0;
   4776 		}
   4777 
   4778 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   4779 
   4780 		/*
   4781 		 * The cinfo->atomic = TRUE only if we have
   4782 		 * non-zero va_seq's, and it has incremented by exactly one
   4783 		 * during the create_vnode and it didn't
   4784 		 * change during the VOP_FSYNC.
   4785 		 */
   4786 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
   4787 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
   4788 			cinfo->atomic = TRUE;
   4789 		else
   4790 			cinfo->atomic = FALSE;
   4791 	}
   4792 
   4793 	/* Check for mandatory locking and that the size gets set. */
   4794 	cva.va_mask = AT_MODE;
   4795 	if (setsize)
   4796 		cva.va_mask |= AT_SIZE;
   4797 
   4798 	/* Assume the worst */
   4799 	cs->mandlock = TRUE;
   4800 
   4801 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
   4802 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
   4803 
   4804 		/*
   4805 		 * Truncate the file if necessary; this would be
   4806 		 * the case for create over an existing file.
   4807 		 */
   4808 
   4809 		if (trunc) {
   4810 			int in_crit = 0;
   4811 			rfs4_file_t *fp;
   4812 			bool_t create = FALSE;
   4813 
   4814 			/*
   4815 			 * We are writing over an existing file.
   4816 			 * Check to see if we need to recall a delegation.
   4817 			 */
   4818 			rfs4_hold_deleg_policy(cs->instp);
   4819 			if ((fp = rfs4_findfile(cs->instp, vp, NULL, &create))
   4820 			    != NULL) {
   4821 				if (rfs4_check_delegated_byfp(cs->instp,
   4822 				    FWRITE, fp, (reqsize == 0), FALSE, FALSE,
   4823 				    &cs->cp->rc_clientid)) {
   4824 
   4825 					rfs4_file_rele(fp);
   4826 					rfs4_rele_deleg_policy(cs->instp);
   4827 					VN_RELE(vp);
   4828 					*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4829 					return (NFS4ERR_DELAY);
   4830 				}
   4831 				rfs4_file_rele(fp);
   4832 			}
   4833 			rfs4_rele_deleg_policy(cs->instp);
   4834 
   4835 			if (nbl_need_check(vp)) {
   4836 				in_crit = 1;
   4837 
   4838 				ASSERT(reqsize == 0);
   4839 
   4840 				nbl_start_crit(vp, RW_READER);
   4841 				if (nbl_conflict(vp, NBL_WRITE, 0,
   4842 				    cva.va_size, 0, NULL)) {
   4843 					in_crit = 0;
   4844 					nbl_end_crit(vp);
   4845 					VN_RELE(vp);
   4846 					*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4847 					return (NFS4ERR_ACCESS);
   4848 				}
   4849 			}
   4850 			ct.cc_sysid = 0;
   4851 			ct.cc_pid = 0;
   4852 			ct.cc_caller_id = cs->instp->caller_id;
   4853 
   4854 			cva.va_mask = AT_SIZE;
   4855 			cva.va_size = reqsize;
   4856 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
   4857 			if (in_crit)
   4858 				nbl_end_crit(vp);
   4859 		}
   4860 	}
   4861 
   4862 	error = mknfs41_fh(&cs->fh, vp, cs->exi);
   4863 	/*
   4864 	 * Force modified data and metadata out to stable storage.
   4865 	 */
   4866 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
   4867 
   4868 	if (error) {
   4869 		VN_RELE(vp);
   4870 		*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4871 		return (puterrno4(error));
   4872 	}
   4873 
   4874 	/* if parent dir is attrdir, set namedattr fh flag */
   4875 	if (dvp->v_flag & V_XATTRDIR)
   4876 		FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val,
   4877 		    FH41_NAMEDATTR);
   4878 
   4879 	if (cs->vp)
   4880 		VN_RELE(cs->vp);
   4881 
   4882 	cs->vp = vp;
   4883 
   4884 	/*
   4885 	 * if we did not create the file, we will need to check
   4886 	 * the access bits on the file
   4887 	 */
   4888 
   4889 	if (!created) {
   4890 		if (setsize)
   4891 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
   4892 		status = check_open_access(args->share_access, cs, req);
   4893 		if (status != NFS4_OK)
   4894 			*attrset = NFS4_EMPTY_ATTRMAP(avers);
   4895 	} else {
   4896 		status = mds_createfile_get_layout(req, vp, cs, &ct, plo);
   4897 
   4898 		/*
   4899 		 * Allow mds_createfile_get_layout() to be verbose
   4900 		 * in what it presents as a status, but be aware
   4901 		 * that it is permissible to not generate a
   4902 		 * layout.
   4903 		 */
   4904 		if (status == NFS4ERR_LAYOUTUNAVAILABLE) {
   4905 			status = NFS4_OK;
   4906 		}
   4907 	}
   4908 
   4909 	return (status);
   4910 }
   4911 
   4912 /*
   4913  * 1) CB RACE           <kill stored rs record>		[done]
   4914  * 2) slot reuse	<kill stored rs record>		[done]
   4915  * 3) CB_RECALL		<(typical/normal case) use new sessid in deleg_state
   4916  *			 to find session originally granted the delegation to
   4917  *			 issue recall over _that_ session's back channel>
   4918  *			XXX - <<< check spec >>>
   4919  */
   4920 void
   4921 rfs41_rs_record(struct compound_state *cs, stateid_type_t type, void *p)
   4922 {
   4923 	rfs4_deleg_state_t	*dsp;
   4924 	slot_ent_t		*slotent;
   4925 
   4926 #ifdef	DEBUG_VERBOSE
   4927 	/*
   4928 	 * XXX - Do not change this to a static D probe;
   4929 	 *	 this is not intended for production !!!
   4930 	 */
   4931 	ulong_t			 offset;
   4932 	char			*who;
   4933 	who = modgetsymname((uintptr_t)caller(), &offset);
   4934 #endif	/* DEBUG_VERBOSE */
   4935 
   4936 	switch (type) {
   4937 	case DELEGID:			/* sessid/slot/seqid + rsid */
   4938 		ASSERT(cs != NULL && cs->sp != NULL);
   4939 
   4940 		dsp = (rfs4_deleg_state_t *)p;
   4941 		ASSERT(dsp != NULL);
   4942 #ifdef	DEBUG_VERBOSE
   4943 		cmn_err(CE_NOTE, "rfs41_rs_record: (%s, dsp = 0x%p)", who, dsp);
   4944 #endif	/* DEBUG_VERBOSE */
   4945 
   4946 		/* delegation state id stored in rfs4_deleg_state_t */
   4947 		bcopy(cs->sp->sn_sessid, dsp->rds_rs.sessid,
   4948 		    sizeof (sessionid4));
   4949 		dsp->rds_rs.seqid = cs->seqid;
   4950 		dsp->rds_rs.slotno = cs->slotno;
   4951 		rfs41_deleg_rs_hold(dsp);
   4952 
   4953 		/* add it to slrc slot to track slot-reuse case */
   4954 		slotent = slrc_slot_get(cs->sp->sn_replay, cs->slotno);
   4955 		ASSERT(slotent != NULL);
   4956 		ASSERT(slotent->se_p == NULL);
   4957 		mutex_enter(&slotent->se_lock);
   4958 		slotent->se_p = (rfs4_deleg_state_t *)dsp;
   4959 		mutex_exit(&slotent->se_lock);
   4960 
   4961 		rfs4_dbe_hold(dsp->rds_dbe);	/* added ref to deleg_state */
   4962 		break;
   4963 
   4964 	case LAYOUTID:
   4965 		/*
   4966 		 * Layout stateid race detection will be done
   4967 		 * using the stateid's embedded seqid field.
   4968 		 */
   4969 		/* FALLTHROUGH */
   4970 	default:
   4971 		break;
   4972 	}
   4973 }
   4974 
   4975 void
   4976 rfs41_rs_erase(void *p)
   4977 {
   4978 	rfs4_deleg_state_t	*dsp = (rfs4_deleg_state_t *)p;
   4979 #ifdef	DEBUG_VERBOSE
   4980 	/*
   4981 	 * XXX - Do not change this to a static D probe;
   4982 	 *	 this is not intended for production !!!
   4983 	 */
   4984 	ulong_t			 offset;
   4985 	char			*who;
   4986 	who = modgetsymname((uintptr_t)caller(), &offset);
   4987 	cmn_err(CE_NOTE, "rfs41_rs_erase: (%s, dsp = 0x%p)", who, dsp);
   4988 #endif	/* DEBUG_VERBOSE */
   4989 
   4990 	ASSERT(dsp != NULL);
   4991 	if (dsp->rds_rs.refcnt > 0) {
   4992 		rfs41_deleg_rs_rele(dsp);
   4993 		rfs4_deleg_state_rele(dsp);
   4994 	}
   4995 }
   4996 
   4997 #ifdef	DEBUG
   4998 /*
   4999  * XXX - This is a handy way to force the server to "wait" before
   5000  *	granting a delegation to the requesting client (thereby
   5001  *	forcing the CB_RACE condition). rsec == # of secs to wait.
   5002  */
   5003 int	rsec = 0;
   5004 #endif
   5005 
   5006 /*ARGSUSED*/
   5007 static void
   5008 mds_do_open(struct compound_state *cs, struct svc_req *req,
   5009     rfs4_openowner_t *oo, delegreq_t deleg, uint32_t access, uint32_t deny,
   5010     OPEN4res *resp, int deleg_cur, mds_layout_t *plo)
   5011 {
   5012 	rfs4_state_t *sp;
   5013 	rfs4_file_t *fp;
   5014 	bool_t screate = TRUE;
   5015 	bool_t fcreate = TRUE;
   5016 	uint32_t amodes;
   5017 	uint32_t dmodes;
   5018 	rfs4_deleg_state_t *dsp;
   5019 	sysid_t sysid;
   5020 	nfsstat4 status;
   5021 	caller_context_t ct;
   5022 	int fflags = 0;
   5023 	int recall = 0;
   5024 	int err;
   5025 	int first_open;
   5026 
   5027 	/* get the file struct and hold a lock on it during initial open */
   5028 	fp = rfs4_findfile_withlock(cs->instp, cs->vp, &cs->fh, &fcreate);
   5029 	if (fp == NULL) {
   5030 		DTRACE_PROBE(nfss__e__no_file);
   5031 		resp->status = NFS4ERR_SERVERFAULT;
   5032 		return;
   5033 	}
   5034 
   5035 	sp = rfs4_findstate_by_owner_file(cs, oo, fp, &screate);
   5036 	if (sp == NULL) {
   5037 		DTRACE_PROBE(nfss__e__no_state);
   5038 		resp->status = NFS4ERR_RESOURCE;
   5039 		/* No need to keep any reference */
   5040 		rfs4_file_rele_withunlock(fp);
   5041 		return;
   5042 	}
   5043 
   5044 	/* try to get the sysid before continuing */
   5045 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
   5046 		resp->status = status;
   5047 		rfs4_file_rele(fp);
   5048 		/* Not a fully formed open; "close" it */
   5049 		if (screate == TRUE)
   5050 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   5051 		rfs4_state_rele(sp);
   5052 		return;
   5053 	}
   5054 
   5055 	/*
   5056 	 * Assign the layout if there is one
   5057 	 * Note that this means the file was just created.
   5058 	 */
   5059 	if (plo) {
   5060 		ASSERT(fp->rf_mlo == NULL);
   5061 		if (fp->rf_mlo) {
   5062 			rfs4_dbe_rele(fp->rf_mlo->mlo_dbe);
   5063 		}
   5064 
   5065 		fp->rf_mlo = plo;
   5066 	}
   5067 
   5068 	/* Calculate the fflags for this OPEN */
   5069 	if (access & OPEN4_SHARE_ACCESS_READ)
   5070 		fflags |= FREAD;
   5071 	if (access & OPEN4_SHARE_ACCESS_WRITE)
   5072 		fflags |= FWRITE;
   5073 
   5074 	rfs4_dbe_lock(sp->rs_dbe);
   5075 
   5076 	/*
   5077 	 * Calculate the new deny and access mode that this open is adding to
   5078 	 * the file for this open owner;
   5079 	 */
   5080 	dmodes = (deny & ~sp->rs_share_deny);
   5081 	amodes = (access & ~sp->rs_share_access);
   5082 
   5083 	first_open = (sp->rs_share_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
   5084 
   5085 	/*
   5086 	 * Check to see the client has already sent an open for this
   5087 	 * open owner on this file with the same share/deny modes.
   5088 	 * If so, we don't need to check for a conflict and we don't
   5089 	 * need to add another shrlock.  If not, then we need to
   5090 	 * check for conflicts in deny and access before checking for
   5091 	 * conflicts in delegation.  We don't want to recall a
   5092 	 * delegation based on an open that will eventually fail based
   5093 	 * on shares modes.
   5094 	 */
   5095 
   5096 	if (dmodes || amodes) {
   5097 		if ((err = rfs4_share(sp, access, deny)) != 0) {
   5098 			rfs4_dbe_unlock(sp->rs_dbe);
   5099 			resp->status = err;
   5100 
   5101 			rfs4_file_rele(fp);
   5102 			/* Not a fully formed open; "close" it */
   5103 			if (screate == TRUE)
   5104 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   5105 			rfs4_state_rele(sp);
   5106 			return;
   5107 		}
   5108 	}
   5109 
   5110 	rfs4_dbe_lock(fp->rf_dbe);
   5111 
   5112 	/*
   5113 	 * Check to see if this file is delegated and if so, if a
   5114 	 * recall needs to be done.
   5115 	 * This only checke the delegations for this instance.  If another
   5116 	 * instance has a delegation for this file, then the conflict
   5117 	 * detection will be done in the monitor on OPEN.  We just need to
   5118 	 * check if we have a delegation and if the calling client is the
   5119 	 * owner.  The monitor doesn't have enough info to determine if the
   5120 	 * caller is the owner of the delegation or not.
   5121 	 */
   5122 	if (rfs4_check_recall(sp, access)) {
   5123 		rfs4_dbe_unlock(fp->rf_dbe);
   5124 		rfs4_dbe_unlock(sp->rs_dbe);
   5125 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
   5126 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
   5127 		rfs4_dbe_lock(sp->rs_dbe);
   5128 
   5129 		/* if state closed while lock was dropped */
   5130 		if (sp->rs_closed) {
   5131 			if (dmodes || amodes)
   5132 				(void) rfs4_unshare(sp);
   5133 			rfs4_dbe_unlock(sp->rs_dbe);
   5134 			rfs4_file_rele(fp);
   5135 			/* Not a fully formed open; "close" it */
   5136 			if (screate == TRUE)
   5137 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   5138 			rfs4_state_rele(sp);
   5139 			resp->status = NFS4ERR_OLD_STATEID;
   5140 			return;
   5141 		}
   5142 
   5143 		rfs4_dbe_lock(fp->rf_dbe);
   5144 		/* Let's see if the delegation was returned */
   5145 		if (rfs4_check_recall(sp, access)) {
   5146 			rfs4_dbe_unlock(fp->rf_dbe);
   5147 			if (dmodes || amodes)
   5148 				(void) rfs4_unshare(sp);
   5149 			rfs4_dbe_unlock(sp->rs_dbe);
   5150 			rfs4_file_rele(fp);
   5151 			rfs4_update_lease(sp->rs_owner->ro_client);
   5152 
   5153 			/* Not a fully formed open; "close" it */
   5154 			if (screate == TRUE)
   5155 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   5156 			rfs4_state_rele(sp);
   5157 			resp->status = NFS4ERR_DELAY;
   5158 			return;
   5159 		}
   5160 	}
   5161 
   5162 	/*
   5163 	 * the share check passed and any delegation conflict has been
   5164 	 * taken care of, now call vop_open.
   5165 	 * if this is the first open then call vop_open with fflags.
   5166 	 * if not, call vn_open_upgrade with just the upgrade flags.
   5167 	 *
   5168 	 * if the file has been opened already, it will have the current
   5169 	 * access mode in the state struct.  if it has no share access, then
   5170 	 * this is a new open.
   5171 	 *
   5172 	 * However, if this is open with CLAIM_DELEGATE_CUR, then don't
   5173 	 * call VOP_OPEN(), just do the open upgrade.
   5174 	 */
   5175 	if (first_open && !deleg_cur) {
   5176 		ct.cc_sysid = sysid;
   5177 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
   5178 		ct.cc_caller_id = cs->instp->caller_id;
   5179 		ct.cc_flags = CC_DONTBLOCK;
   5180 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
   5181 		if (err) {
   5182 			rfs4_dbe_unlock(fp->rf_dbe);
   5183 			if (dmodes || amodes)
   5184 				(void) rfs4_unshare(sp);
   5185 			rfs4_dbe_unlock(sp->rs_dbe);
   5186 			rfs4_file_rele(fp);
   5187 
   5188 			/* Not a fully formed open; "close" it */
   5189 			if (screate == TRUE)
   5190 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   5191 			rfs4_state_rele(sp);
   5192 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
   5193 				resp->status = NFS4ERR_DELAY;
   5194 			else
   5195 				resp->status = NFS4ERR_SERVERFAULT;
   5196 			return;
   5197 		}
   5198 	} else { /* open upgrade */
   5199 		/*
   5200 		 * calculate the fflags for the new mode that is being added
   5201 		 * by this upgrade.
   5202 		 */
   5203 		fflags = 0;
   5204 		if (amodes & OPEN4_SHARE_ACCESS_READ)
   5205 			fflags |= FREAD;
   5206 		if (amodes & OPEN4_SHARE_ACCESS_WRITE)
   5207 			fflags |= FWRITE;
   5208 		vn_open_upgrade(cs->vp, fflags);
   5209 	}
   5210 	sp->rs_opened = TRUE;
   5211 
   5212 	if (dmodes & OPEN4_SHARE_DENY_READ)
   5213 		fp->rf_deny_read++;
   5214 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
   5215 		fp->rf_deny_write++;
   5216 	fp->rf_share_deny |= deny;
   5217 
   5218 	if (amodes & OPEN4_SHARE_ACCESS_READ)
   5219 		fp->rf_access_read++;
   5220 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
   5221 		fp->rf_access_write++;
   5222 	fp->rf_share_access |= access;
   5223 
   5224 	/*
   5225 	 * Check for delegation here. if the deleg argument is not
   5226 	 * DELEG_ANY, then this is a reclaim from a client and
   5227 	 * we must honor the delegation requested. If necessary we can
   5228 	 * set the recall flag.
   5229 	 */
   5230 	dsp = rfs4_grant_delegation(cs, deleg, sp, &recall);
   5231 
   5232 	cs->deleg = (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE);
   5233 
   5234 	next_stateid(&sp->rs_stateid);
   5235 
   5236 	resp->stateid = sp->rs_stateid.stateid;
   5237 
   5238 	rfs4_dbe_unlock(fp->rf_dbe);
   5239 	rfs4_dbe_unlock(sp->rs_dbe);
   5240 
   5241 	if (dsp) {
   5242 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
   5243 		rfs41_rs_record(cs, DELEGID, dsp);
   5244 		rfs4_deleg_state_rele(dsp);
   5245 #ifdef	DEBUG
   5246 		if (rsec) {
   5247 			/* add delay here to force CB_RACE; rick */
   5248 			delay(SEC_TO_TICK(rsec));
   5249 		}
   5250 #endif
   5251 	}
   5252 
   5253 	rfs4_file_rele(fp);
   5254 	rfs4_state_rele(sp);
   5255 
   5256 	resp->status = NFS4_OK;
   5257 }
   5258 
   5259 nfsstat4
   5260 mds_lookupfile(component4 *component, struct svc_req *req,
   5261 		struct compound_state *cs, uint32_t access,
   5262 		change_info4 *cinfo)
   5263 {
   5264 	nfsstat4 status;
   5265 	char *nm;
   5266 	uint32_t len;
   5267 	vnode_t *dvp = cs->vp;
   5268 	vattr_t bva, ava, fva;
   5269 	int error;
   5270 
   5271 	if (dvp == NULL) {
   5272 		return (NFS4ERR_NOFILEHANDLE);
   5273 	}
   5274 
   5275 	if (dvp->v_type != VDIR) {
   5276 		return (NFS4ERR_NOTDIR);
   5277 	}
   5278 
   5279 	if (!utf8_dir_verify(component))
   5280 		return (NFS4ERR_INVAL);
   5281 
   5282 	nm = utf8_to_fn(component, &len, NULL);
   5283 	if (nm == NULL) {
   5284 		return (NFS4ERR_INVAL);
   5285 	}
   5286 
   5287 	if (len > MAXNAMELEN) {
   5288 		kmem_free(nm, len);
   5289 		return (NFS4ERR_NAMETOOLONG);
   5290 	}
   5291 
   5292 	/* Get "before" change value */
   5293 	bva.va_mask = AT_CTIME|AT_SEQ;
   5294 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   5295 	if (error)
   5296 		return (puterrno4(error));
   5297 
   5298 	/* mds_lookup may VN_RELE directory */
   5299 	VN_HOLD(dvp);
   5300 
   5301 	status = mds_do_lookup(nm, len, req, cs);
   5302 
   5303 	kmem_free(nm, len);
   5304 
   5305 	if (status != NFS4_OK) {
   5306 		VN_RELE(dvp);
   5307 		return (status);
   5308 	}
   5309 
   5310 	/*
   5311 	 * Get "after" change value, if it fails, simply return the
   5312 	 * before value.
   5313 	 */
   5314 	ava.va_mask = AT_CTIME|AT_SEQ;
   5315 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   5316 		ava.va_ctime = bva.va_ctime;
   5317 		ava.va_seq = 0;
   5318 	}
   5319 	VN_RELE(dvp);
   5320 
   5321 	/*
   5322 	 * Validate the file is a file
   5323 	 */
   5324 	fva.va_mask = AT_TYPE|AT_MODE;
   5325 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
   5326 	if (error)
   5327 		return (puterrno4(error));
   5328 
   5329 	if (fva.va_type != VREG) {
   5330 		if (fva.va_type == VDIR)
   5331 			return (NFS4ERR_ISDIR);
   5332 		if (fva.va_type == VLNK)
   5333 			return (NFS4ERR_SYMLINK);
   5334 		return (NFS4ERR_INVAL);
   5335 	}
   5336 
   5337 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
   5338 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   5339 
   5340 	/*
   5341 	 * It is undefined if VOP_LOOKUP will change va_seq, so
   5342 	 * cinfo.atomic = TRUE only if we have
   5343 	 * non-zero va_seq's, and they have not changed.
   5344 	 */
   5345 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
   5346 		cinfo->atomic = TRUE;
   5347 	else
   5348 		cinfo->atomic = FALSE;
   5349 
   5350 	/* Check for mandatory locking */
   5351 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
   5352 	return (check_open_access(access, cs, req));
   5353 }
   5354 
   5355 /*ARGSUSED*/
   5356 static void
   5357 mds_do_opennull(struct compound_state *cs,
   5358 		struct svc_req *req,
   5359 		OPEN4args *args,
   5360 		rfs4_openowner_t *oo,
   5361 		OPEN4res *resp)
   5362 {
   5363 	change_info4 	*cinfo = &resp->cinfo;
   5364 	attrmap4 	*attrset = &resp->attrset;
   5365 
   5366 	mds_layout_t	*plo = NULL;
   5367 
   5368 	if (args->opentype == OPEN4_NOCREATE)
   5369 		resp->status = mds_lookupfile(&args->open_claim4_u.file,
   5370 		    req, cs, (args->share_access & 0xff), cinfo);
   5371 	else {
   5372 		/* inhibit delegation grants during exclusive create */
   5373 
   5374 		if (args->mode == EXCLUSIVE4)
   5375 			rfs4_disable_delegation(cs->instp);
   5376 
   5377 		/*
   5378 		 * Create the file and get the layout.
   5379 		 */
   5380 		resp->status = mds_createfile(args, req, cs, cinfo,
   5381 		    attrset, &plo);
   5382 	}
   5383 
   5384 	if (resp->status == NFS4_OK) {
   5385 
   5386 		/* cs->vp and cs->fh now references the desired file */
   5387 		mds_do_open(cs, req, oo, do_41_deleg_hack(args->share_access),
   5388 		    (args->share_access & 0xff), args->share_deny, resp,
   5389 		    0, plo);
   5390 
   5391 		/*
   5392 		 * If rfs4_createfile set attrset, we must
   5393 		 * clear this attrset before the response is copied.
   5394 		 */
   5395 		if (resp->status != NFS4_OK)
   5396 			resp->attrset =
   5397 			    NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   5398 	} else
   5399 		*cs->statusp = resp->status;
   5400 
   5401 	if (args->mode == EXCLUSIVE4)
   5402 		rfs4_enable_delegation(cs->instp);
   5403 }
   5404 
   5405 /*ARGSUSED*/
   5406 static void
   5407 mds_do_openprev(struct compound_state *cs, struct svc_req *req,
   5408 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   5409 {
   5410 	change_info4 *cinfo = &resp->cinfo;
   5411 	vattr_t va;
   5412 	vtype_t v_type = cs->vp->v_type;
   5413 	int error = 0;
   5414 	caller_context_t ct;
   5415 
   5416 	/* Verify that we have a regular file */
   5417 	if (v_type != VREG) {
   5418 		if (v_type == VDIR)
   5419 			resp->status = NFS4ERR_ISDIR;
   5420 		else if (v_type == VLNK)
   5421 			resp->status = NFS4ERR_SYMLINK;
   5422 		else
   5423 			resp->status = NFS4ERR_INVAL;
   5424 		return;
   5425 	}
   5426 
   5427 	ct.cc_sysid = 0;
   5428 	ct.cc_pid = 0;
   5429 	ct.cc_caller_id = cs->instp->caller_id;
   5430 	ct.cc_flags = CC_DONTBLOCK;
   5431 
   5432 	va.va_mask = AT_MODE|AT_UID;
   5433 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, &ct);
   5434 	if (error) {
   5435 		resp->status = puterrno4(error);
   5436 		return;
   5437 	}
   5438 
   5439 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
   5440 
   5441 	/*
   5442 	 * Check if we have access to the file, Note the the file
   5443 	 * could have originally been open UNCHECKED or GUARDED
   5444 	 * with mode bits that will now fail, but there is nothing
   5445 	 * we can really do about that except in the case that the
   5446 	 * owner of the file is the one requesting the open.
   5447 	 */
   5448 	if (crgetuid(cs->cr) != va.va_uid) {
   5449 		resp->status = check_open_access((args->share_access & 0xff),
   5450 		    cs, req);
   5451 		if (resp->status != NFS4_OK) {
   5452 			return;
   5453 		}
   5454 	}
   5455 
   5456 	/*
   5457 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
   5458 	 */
   5459 	cinfo->before = 0;
   5460 	cinfo->after = 0;
   5461 	cinfo->atomic = FALSE;
   5462 
   5463 	mds_do_open(cs, req, oo,
   5464 	    NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
   5465 	    (args->share_access && 0xff), args->share_deny, resp, 0, NULL);
   5466 }
   5467 
   5468 static void
   5469 mds_do_opendelcur(struct compound_state *cs, struct svc_req *req,
   5470 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   5471 {
   5472 	int error;
   5473 	nfsstat4 status;
   5474 	stateid4 stateid =
   5475 	    args->open_claim4_u.delegate_cur_info.delegate_stateid;
   5476 	rfs4_deleg_state_t *dsp;
   5477 
   5478 	/*
   5479 	 * Find the state info from the stateid and confirm that the
   5480 	 * file is delegated.  If the state openowner is the same as
   5481 	 * the supplied openowner we're done. If not, get the file
   5482 	 * info from the found state info. Use that file info to
   5483 	 * create the state for this lock owner. Note solaris doen't
   5484 	 * really need the pathname to find the file. We may want to
   5485 	 * lookup the pathname and make sure that the vp exist and
   5486 	 * matches the vp in the file structure. However it is
   5487 	 * possible that the pathname nolonger exists (local process
   5488 	 * unlinks the file), so this may not be that useful.
   5489 	 */
   5490 
   5491 	status = rfs4_get_deleg_state(cs, &stateid, &dsp);
   5492 	if (status != NFS4_OK) {
   5493 		resp->status = status;
   5494 		return;
   5495 	}
   5496 
   5497 	ASSERT(dsp->rds_finfo->rf_dinfo->rd_dtype != OPEN_DELEGATE_NONE);
   5498 
   5499 	/*
   5500 	 * New lock owner, create state. Since this was probably called
   5501 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
   5502 	 */
   5503 
   5504 	ASSERT(cs->vp != NULL);
   5505 	VN_RELE(cs->vp);
   5506 	VN_HOLD(dsp->rds_finfo->rf_vp);
   5507 	cs->vp = dsp->rds_finfo->rf_vp;
   5508 
   5509 	if (error = mknfs41_fh(&cs->fh, cs->vp, cs->exi)) {
   5510 		rfs4_deleg_state_rele(dsp);
   5511 		*cs->statusp = resp->status = puterrno4(error);
   5512 		return;
   5513 	}
   5514 
   5515 	/* Mark progress for delegation returns */
   5516 	dsp->rds_finfo->rf_dinfo->rd_time_lastwrite = gethrestime_sec();
   5517 	rfs4_deleg_state_rele(dsp);
   5518 	mds_do_open(cs, req, oo, DELEG_NONE,
   5519 	    (args->share_access & 0xff),
   5520 	    args->share_deny, resp, 1, NULL);
   5521 }
   5522 
   5523 /*ARGSUSED*/
   5524 static void
   5525 mds_do_opendelprev(struct compound_state *cs, struct svc_req *req,
   5526 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   5527 {
   5528 	/*
   5529 	 * Lookup the pathname, it must already exist since this file
   5530 	 * was delegated.
   5531 	 *
   5532 	 * Find the file and state info for this vp and open owner pair.
   5533 	 *	check that they are in fact delegated.
   5534 	 *	check that the state access and deny modes are the same.
   5535 	 *
   5536 	 * Return the delgation possibly seting the recall flag.
   5537 	 */
   5538 	rfs4_file_t *fp;
   5539 	rfs4_state_t *sp;
   5540 	bool_t create = FALSE;
   5541 	bool_t dcreate = FALSE;
   5542 	rfs4_deleg_state_t *dsp;
   5543 	nfsace4 *ace;
   5544 
   5545 
   5546 	/* Note we ignore oflags */
   5547 	resp->status = mds_lookupfile(&args->open_claim4_u.file_delegate_prev,
   5548 	    req, cs, (args->share_access & 0xff), &resp->cinfo);
   5549 	if (resp->status != NFS4_OK) {
   5550 		return;
   5551 	}
   5552 
   5553 	/* get the file struct and hold a lock on it during initial open */
   5554 	fp = rfs4_findfile_withlock(cs->instp, cs->vp, NULL, &create);
   5555 	if (fp == NULL) {
   5556 		DTRACE_PROBE(nfss__e__no_file);
   5557 		resp->status = NFS4ERR_SERVERFAULT;
   5558 		return;
   5559 	}
   5560 
   5561 	sp = rfs4_findstate_by_owner_file(cs, oo, fp, &create);
   5562 	if (sp == NULL) {
   5563 		DTRACE_PROBE(nfss__e__no_state);
   5564 		resp->status = NFS4ERR_SERVERFAULT;
   5565 		rfs4_file_rele_withunlock(fp);
   5566 		return;
   5567 	}
   5568 
   5569 	rfs4_dbe_lock(sp->rs_dbe);
   5570 	rfs4_dbe_lock(fp->rf_dbe);
   5571 	if ((args->share_access & 0xff) != sp->rs_share_access ||
   5572 	    args->share_deny != sp->rs_share_deny ||
   5573 	    sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) {
   5574 		DTRACE_PROBE2(nfss__e__state_mixup, rfs4_state_t *, sp,
   5575 		    OPEN4args *, args);
   5576 		rfs4_dbe_unlock(fp->rf_dbe);
   5577 		rfs4_dbe_unlock(sp->rs_dbe);
   5578 		rfs4_file_rele(fp);
   5579 		rfs4_state_rele(sp);
   5580 		resp->status = NFS4ERR_SERVERFAULT;
   5581 		return;
   5582 	}
   5583 	rfs4_dbe_unlock(fp->rf_dbe);
   5584 	rfs4_dbe_unlock(sp->rs_dbe);
   5585 
   5586 	dsp = rfs4_finddeleg(cs, sp, &dcreate);
   5587 	if (dsp == NULL) {
   5588 		DTRACE_PROBE(nfss__e__no_deleg);
   5589 		rfs4_state_rele(sp);
   5590 		rfs4_file_rele(fp);
   5591 		resp->status = NFS4ERR_SERVERFAULT;
   5592 		return;
   5593 	}
   5594 
   5595 	next_stateid(&sp->rs_stateid);
   5596 
   5597 	resp->stateid = sp->rs_stateid.stateid;
   5598 
   5599 	resp->delegation.delegation_type = dsp->rds_dtype;
   5600 
   5601 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
   5602 		open_read_delegation4 *rv =
   5603 		    &resp->delegation.open_delegation4_u.read;
   5604 
   5605 		rv->stateid = dsp->rds_delegid.stateid;
   5606 		rv->recall = FALSE; /* no policy in place to set to TRUE */
   5607 		ace = &rv->permissions;
   5608 	} else {
   5609 		open_write_delegation4 *rv =
   5610 		    &resp->delegation.open_delegation4_u.write;
   5611 
   5612 		rv->stateid = dsp->rds_delegid.stateid;
   5613 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
   5614 		ace = &rv->permissions;
   5615 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
   5616 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
   5617 	}
   5618 
   5619 	/* XXX For now */
   5620 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
   5621 	ace->flag = 0;
   5622 	ace->access_mask = 0;
   5623 	ace->who.utf8string_len = 0;
   5624 	ace->who.utf8string_val = 0;
   5625 
   5626 	rfs4_deleg_state_rele(dsp);
   5627 	rfs4_state_rele(sp);
   5628 	rfs4_file_rele(fp);
   5629 }
   5630 
   5631 static void
   5632 mds_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
   5633 	    struct svc_req *req, compound_state_t *cs)
   5634 {
   5635 	OPEN4args		*args = &argop->nfs_argop4_u.opopen;
   5636 	OPEN4res		*resp = &resop->nfs_resop4_u.opopen;
   5637 	open_owner4		*owner = &args->owner;
   5638 	open_claim_type4	claim = args->claim;
   5639 	rfs4_client_t		*cp;
   5640 	rfs4_openowner_t	*oo;
   5641 	bool_t			create;
   5642 	int			can_reclaim;
   5643 	int			share_access;
   5644 
   5645 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
   5646 	    OPEN4args *, args);
   5647 
   5648 	if (cs->vp == NULL) {
   5649 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5650 		goto final;
   5651 	}
   5652 
   5653 	cp = cs->cp;
   5654 	owner->clientid = cp->rc_clientid;
   5655 	can_reclaim = cp->rc_can_reclaim;
   5656 
   5657 retry:
   5658 	create = TRUE;
   5659 	oo = mds_findopenowner(cs->instp, owner, &create);
   5660 	if (oo == NULL) {
   5661 		/* XXX: this seems a little fishy... */
   5662 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
   5663 		goto final;
   5664 	}
   5665 
   5666 	/* Need to serialize access to the stateid space */
   5667 	rfs4_sw_enter(&oo->ro_sw);
   5668 
   5669 	/* Grace only applies to regular-type OPENs */
   5670 	if (rfs4_clnt_in_grace(cp) &&
   5671 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
   5672 		*cs->statusp = resp->status = NFS4ERR_GRACE;
   5673 		goto out;
   5674 	}
   5675 
   5676 	/*
   5677 	 * If previous state at the server existed then can_reclaim
   5678 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
   5679 	 * client.
   5680 	 */
   5681 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
   5682 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   5683 		goto out;
   5684 	}
   5685 
   5686 	/*
   5687 	 * Reject the open if the client has missed the grace period
   5688 	 */
   5689 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
   5690 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   5691 		goto out;
   5692 	}
   5693 
   5694 	/*
   5695 	 * OPEN_CONFIRM is mandatory not to impl in 4.1.
   5696 	 */
   5697 	oo->ro_need_confirm = FALSE;
   5698 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
   5699 
   5700 	/*
   5701 	 * If there is an unshared filesystem mounted on this vnode,
   5702 	 * do not allow to open/create in this directory.
   5703 	 */
   5704 	if (vn_ismntpt(cs->vp)) {
   5705 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5706 		goto out;
   5707 	}
   5708 
   5709 	share_access = (args->share_access && 0xff);
   5710 
   5711 	/*
   5712 	 * access must READ, WRITE, or BOTH.  No access is invalid.
   5713 	 * deny can be READ, WRITE, BOTH, or NONE.
   5714 	 * bits not defined for access/deny are invalid.
   5715 	 */
   5716 	if (! (share_access & OPEN4_SHARE_ACCESS_BOTH) ||
   5717 	    (share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
   5718 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
   5719 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5720 		goto out;
   5721 	}
   5722 
   5723 	/*
   5724 	 * make sure attrset is zero before response is built.
   5725 	 */
   5726 	resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   5727 
   5728 	switch (claim) {
   5729 	case CLAIM_NULL:
   5730 		mds_do_opennull(cs, req, args, oo, resp);
   5731 		break;
   5732 	case CLAIM_PREVIOUS:
   5733 		mds_do_openprev(cs, req, args, oo, resp);
   5734 		break;
   5735 	case CLAIM_DELEGATE_CUR:
   5736 		mds_do_opendelcur(cs, req, args, oo, resp);
   5737 		break;
   5738 	case CLAIM_DELEGATE_PREV:
   5739 		mds_do_opendelprev(cs, req, args, oo, resp);
   5740 		break;
   5741 	/*  OTHER CLAIM TYPES !!! */
   5742 	default:
   5743 		resp->status = NFS4ERR_INVAL;
   5744 		break;
   5745 	}
   5746 
   5747 out:
   5748 	switch (resp->status) {
   5749 	case NFS4ERR_BADXDR:
   5750 	case NFS4ERR_BAD_SEQID:
   5751 	case NFS4ERR_BAD_STATEID:
   5752 	case NFS4ERR_NOFILEHANDLE:
   5753 	case NFS4ERR_RESOURCE:
   5754 	case NFS4ERR_STALE_CLIENTID:
   5755 	case NFS4ERR_STALE_STATEID:
   5756 		/*
   5757 		 * The protocol states that if any of these errors are
   5758 		 * being returned, the sequence id should not be
   5759 		 * incremented.  Any other return requires an
   5760 		 * increment.
   5761 		 */
   5762 		break;
   5763 	}
   5764 	*cs->statusp = resp->status;
   5765 	rfs4_sw_exit(&oo->ro_sw);
   5766 	rfs4_openowner_rele(oo);
   5767 
   5768 final:
   5769 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
   5770 	    OPEN4res *, resp);
   5771 }
   5772 
   5773 /*ARGSUSED*/
   5774 static void
   5775 mds_free_reply(nfs_resop4 *resop, compound_state_t *cs)
   5776 {
   5777 	/* Common function for NFSv4.0 and NFSv4.1 */
   5778 	rfs4_free_reply(resop);
   5779 }
   5780 
   5781 /*ARGSUSED*/
   5782 void
   5783 mds_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
   5784 		    struct svc_req *req, compound_state_t *cs)
   5785 {
   5786 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
   5787 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
   5788 	uint32_t access = (args->share_access & 0xff);
   5789 	uint32_t deny = args->share_deny;
   5790 	nfsstat4 status;
   5791 	rfs4_state_t *sp;
   5792 	rfs4_file_t *fp;
   5793 	int fflags = 0;
   5794 	int rc;
   5795 
   5796 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
   5797 	    OPEN_DOWNGRADE4args *, args);
   5798 
   5799 	if (cs->vp == NULL) {
   5800 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5801 		goto final;
   5802 	}
   5803 
   5804 	if (mds_strict_seqid && args->seqid) {
   5805 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5806 		goto final;
   5807 	}
   5808 
   5809 	status = rfs4_get_state(cs, &args->open_stateid, &sp, RFS4_DBS_VALID);
   5810 	if (status != NFS4_OK) {
   5811 		*cs->statusp = resp->status = status;
   5812 		goto final;
   5813 	}
   5814 
   5815 	/* Ensure specified filehandle matches */
   5816 	if (cs->vp != sp->rs_finfo->rf_vp) {
   5817 		rfs4_state_rele(sp);
   5818 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   5819 		goto final;
   5820 	}
   5821 
   5822 	/* hold off other access to open_owner while we tinker */
   5823 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
   5824 
   5825 	rc = mds_check_stateid_seqid(sp, &args->open_stateid);
   5826 	switch (rc) {
   5827 	case NFS4_CHECK_STATEID_OKAY:
   5828 		break;
   5829 	case NFS4_CHECK_STATEID_OLD:
   5830 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   5831 		goto end;
   5832 	case NFS4_CHECK_STATEID_BAD:
   5833 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   5834 		goto end;
   5835 	case NFS4_CHECK_STATEID_EXPIRED:
   5836 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   5837 		goto end;
   5838 	case NFS4_CHECK_STATEID_CLOSED:
   5839 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   5840 		goto end;
   5841 	case NFS4_CHECK_STATEID_UNCONFIRMED:
   5842 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   5843 		goto end;
   5844 	case NFS4_CHECK_STATEID_REPLAY:
   5845 		ASSERT(0);
   5846 		break;
   5847 	default:
   5848 		ASSERT(FALSE);
   5849 		break;
   5850 	}
   5851 
   5852 	rfs4_dbe_lock(sp->rs_dbe);
   5853 	/*
   5854 	 * Check that the new access modes and deny modes are valid.
   5855 	 * Check that no invalid bits are set.
   5856 	 */
   5857 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
   5858 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
   5859 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5860 		rfs4_dbe_unlock(sp->rs_dbe);
   5861 		goto end;
   5862 	}
   5863 
   5864 	/*
   5865 	 * The new modes must be a subset of the current modes and
   5866 	 * the access must specify at least one mode. To test that
   5867 	 * the new mode is a subset of the current modes we bitwise
   5868 	 * AND them together and check that the result equals the new
   5869 	 * mode. For example:
   5870 	 * New mode, access == R and current mode, sp->share_access  == RW
   5871 	 * access & sp->share_access == R == access, so the new access mode
   5872 	 * is valid. Consider access == RW, sp->share_access = R
   5873 	 * access & sp->share_access == R != access, so the new access mode
   5874 	 * is invalid.
   5875 	 */
   5876 	if ((access & sp->rs_share_access) != access ||
   5877 	    (deny & sp->rs_share_deny) != deny ||
   5878 	    (access &
   5879 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
   5880 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5881 		rfs4_dbe_unlock(sp->rs_dbe);
   5882 		goto end;
   5883 	}
   5884 
   5885 	/*
   5886 	 * Release any share locks associated with this stateID.
   5887 	 * Strictly speaking, this violates the spec because the
   5888 	 * spec effectively requires that open downgrade be atomic.
   5889 	 * At present, fs_shrlock does not have this capability.
   5890 	 */
   5891 	rfs4_unshare(sp);
   5892 
   5893 	fp = sp->rs_finfo;
   5894 	rfs4_dbe_lock(fp->rf_dbe);
   5895 
   5896 	/*
   5897 	 * If the current mode has deny read and the new mode
   5898 	 * does not, decrement the number of deny read mode bits
   5899 	 * and if it goes to zero turn off the deny read bit
   5900 	 * on the file.
   5901 	 */
   5902 	if ((sp->rs_share_deny & OPEN4_SHARE_DENY_READ) &&
   5903 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
   5904 		fp->rf_deny_read--;
   5905 		if (fp->rf_deny_read == 0)
   5906 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
   5907 	}
   5908 
   5909 	/*
   5910 	 * If the current mode has deny write and the new mode
   5911 	 * does not, decrement the number of deny write mode bits
   5912 	 * and if it goes to zero turn off the deny write bit
   5913 	 * on the file.
   5914 	 */
   5915 	if ((sp->rs_share_deny & OPEN4_SHARE_DENY_WRITE) &&
   5916 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
   5917 		fp->rf_deny_write--;
   5918 		if (fp->rf_deny_write == 0)
   5919 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
   5920 	}
   5921 
   5922 	/*
   5923 	 * If the current mode has access read and the new mode
   5924 	 * does not, decrement the number of access read mode bits
   5925 	 * and if it goes to zero turn off the access read bit
   5926 	 * on the file. set fflags to FREAD for the call to
   5927 	 * vn_open_downgrade().
   5928 	 */
   5929 	if ((sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) &&
   5930 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
   5931 		fp->rf_access_read--;
   5932 		if (fp->rf_access_read == 0)
   5933 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
   5934 		fflags |= FREAD;
   5935 	}
   5936 
   5937 	/*
   5938 	 * If the current mode has access write and the new mode
   5939 	 * does not, decrement the number of access write mode bits
   5940 	 * and if it goes to zero turn off the access write bit
   5941 	 * on the file. set fflags to FWRITE for the call to
   5942 	 * vn_open_downgrade().
   5943 	 */
   5944 	if ((sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) &&
   5945 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
   5946 		fp->rf_access_write--;
   5947 		if (fp->rf_access_write == 0)
   5948 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
   5949 		fflags |= FWRITE;
   5950 	}
   5951 
   5952 	/* Check that the file is still accessible */
   5953 	ASSERT(fp->rf_share_access);
   5954 
   5955 	rfs4_dbe_unlock(fp->rf_dbe);
   5956 
   5957 	status = rfs4_share(sp, access, deny);
   5958 	rfs4_dbe_unlock(sp->rs_dbe);
   5959 
   5960 	if (status != NFS4_OK) {
   5961 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   5962 		goto end;
   5963 	}
   5964 
   5965 	/*
   5966 	 * we successfully downgraded the share lock, now we need to downgrade
   5967 	 * the open.  it is possible that the downgrade was only for a deny
   5968 	 * mode and we have nothing else to do.
   5969 	 */
   5970 	if ((fflags & (FREAD|FWRITE)) != 0)
   5971 		vn_open_downgrade(cs->vp, fflags);
   5972 
   5973 	rfs4_dbe_lock(sp->rs_dbe);
   5974 
   5975 	/* Update the stateid */
   5976 	next_stateid(&sp->rs_stateid);
   5977 	resp->open_stateid = sp->rs_stateid.stateid;
   5978 
   5979 	rfs4_dbe_unlock(sp->rs_dbe);
   5980 
   5981 	*cs->statusp = resp->status = NFS4_OK;
   5982 	/* Update the lease */
   5983 	rfs4_update_lease(sp->rs_owner->ro_client);
   5984 end:
   5985 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
   5986 	rfs4_state_rele(sp);
   5987 
   5988 final:
   5989 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
   5990 	    OPEN_DOWNGRADE4res *, resp);
   5991 
   5992 }
   5993 
   5994 /*ARGSUSED*/
   5995 void
   5996 mds_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
   5997 	    struct svc_req *req, compound_state_t *cs)
   5998 {
   5999 	/* XXX Currently not using req arg */
   6000 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
   6001 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
   6002 	rfs4_state_t *sp;
   6003 	nfsstat4 status;
   6004 	int rc;
   6005 
   6006 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
   6007 	    CLOSE4args *, args);
   6008 
   6009 	if (cs->vp == NULL) {
   6010 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   6011 		goto final;
   6012 	}
   6013 
   6014 	if (mds_strict_seqid && args->seqid) {
   6015 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   6016 		goto final;
   6017 	}
   6018 
   6019 	status = rfs4_get_state(cs, &args->open_stateid, &sp, RFS4_DBS_INVALID);
   6020 	if (status != NFS4_OK) {
   6021 		*cs->statusp = resp->status = status;
   6022 		goto final;
   6023 	}
   6024 
   6025 	/* Ensure specified filehandle matches */
   6026 	if (cs->vp != sp->rs_finfo->rf_vp) {
   6027 		rfs4_state_rele(sp);
   6028 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6029 		goto final;
   6030 	}
   6031 
   6032 	/* hold off other access to open_owner while we tinker */
   6033 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
   6034 
   6035 	rc = mds_check_stateid_seqid(sp, &args->open_stateid);
   6036 	switch (rc) {
   6037 	case NFS4_CHECK_STATEID_OKAY:
   6038 		break;
   6039 	case NFS4_CHECK_STATEID_OLD:
   6040 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6041 		goto end;
   6042 	case NFS4_CHECK_STATEID_BAD:
   6043 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6044 		goto end;
   6045 	case NFS4_CHECK_STATEID_EXPIRED:
   6046 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   6047 		goto end;
   6048 	case NFS4_CHECK_STATEID_CLOSED:
   6049 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6050 		goto end;
   6051 	case NFS4_CHECK_STATEID_UNCONFIRMED:
   6052 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6053 		goto end;
   6054 	case NFS4_CHECK_STATEID_REPLAY:
   6055 		ASSERT(0);
   6056 		break;
   6057 	default:
   6058 		ASSERT(FALSE);
   6059 		break;
   6060 	}
   6061 
   6062 	rfs4_dbe_lock(sp->rs_dbe);
   6063 
   6064 	/* Update the stateid. */
   6065 	next_stateid(&sp->rs_stateid);
   6066 	resp->open_stateid = sp->rs_stateid.stateid;
   6067 
   6068 	rfs4_dbe_unlock(sp->rs_dbe);
   6069 
   6070 	rfs4_update_lease(sp->rs_owner->ro_client);
   6071 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6072 
   6073 	*cs->statusp = resp->status = status;
   6074 
   6075 end:
   6076 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
   6077 	rfs4_state_rele(sp);
   6078 
   6079 final:
   6080 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
   6081 	    CLOSE4res *, resp);
   6082 
   6083 }
   6084 
   6085 /*
   6086  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
   6087  */
   6088 static nfsstat4
   6089 mds_lock_denied(nfs_server_instance_t *instp, LOCK4denied *dp,
   6090     struct flock64 *flk)
   6091 {
   6092 	rfs4_lockowner_t *lo;
   6093 	rfs4_client_t *cp;
   6094 	uint32_t len;
   6095 
   6096 	lo = findlockowner_by_pid(instp, flk->l_pid);
   6097 	if (lo != NULL) {
   6098 		cp = lo->rl_client;
   6099 		if (rfs4_lease_expired(cp)) {
   6100 			rfs4_lockowner_rele(lo);
   6101 			rfs4_dbe_hold(cp->rc_dbe);
   6102 			rfs4_client_close(cp);
   6103 			return (NFS4ERR_EXPIRED);
   6104 		}
   6105 		dp->owner.clientid = lo->rl_owner.clientid;
   6106 		len = lo->rl_owner.owner_len;
   6107 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
   6108 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
   6109 		dp->owner.owner_len = len;
   6110 		rfs4_lockowner_rele(lo);
   6111 		goto finish;
   6112 	}
   6113 
   6114 	/*
   6115 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
   6116 	 * of the client id contain the boot time for a NFS4 lock. So we
   6117 	 * fabricate and identity by setting clientid to the sysid, and
   6118 	 * the lock owner to the pid.
   6119 	 */
   6120 	dp->owner.clientid = flk->l_sysid;
   6121 	len = sizeof (pid_t);
   6122 	dp->owner.owner_len = len;
   6123 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
   6124 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
   6125 finish:
   6126 	dp->offset = flk->l_start;
   6127 	dp->length = flk->l_len;
   6128 
   6129 	if (flk->l_type == F_RDLCK)
   6130 		dp->locktype = READ_LT;
   6131 	else if (flk->l_type == F_WRLCK)
   6132 		dp->locktype = WRITE_LT;
   6133 	else
   6134 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
   6135 
   6136 	return (NFS4_OK);
   6137 }
   6138 
   6139 /*ARGSUSED*/
   6140 void
   6141 mds_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
   6142 	    struct svc_req *req, compound_state_t *cs)
   6143 {
   6144 	/* XXX Currently not using req arg */
   6145 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
   6146 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
   6147 	nfsstat4 status;
   6148 	stateid4 *stateid;
   6149 	rfs4_lockowner_t *lo;
   6150 	rfs4_client_t *cp;
   6151 	rfs4_state_t *sp = NULL;
   6152 	rfs4_lo_state_t *lsp = NULL;
   6153 	bool_t ls_sw_held = FALSE;
   6154 	bool_t create = TRUE;
   6155 	bool_t lcreate = TRUE;
   6156 	int rc;
   6157 
   6158 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
   6159 	    LOCK4args *, args);
   6160 
   6161 
   6162 	if (cs->vp == NULL) {
   6163 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   6164 		goto final;
   6165 	}
   6166 
   6167 	if (args->locker.new_lock_owner) {
   6168 		/* Create a new lockowner for this instance */
   6169 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
   6170 
   6171 		/*
   6172 		 * validate that open_seqid, lock_seqid and the
   6173 		 * clientid in lock_owner are all zero.
   6174 		 */
   6175 		if (mds_strict_seqid && (olo->open_seqid ||
   6176 		    olo->lock_seqid ||
   6177 		    olo->lock_owner.clientid)) {
   6178 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   6179 			goto final;
   6180 		}
   6181 
   6182 		/*
   6183 		 * get/validate the open stateid
   6184 		 */
   6185 		stateid = &olo->open_stateid;
   6186 		status = rfs4_get_state(cs, stateid, &sp, RFS4_DBS_VALID);
   6187 		if (status != NFS4_OK) {
   6188 			*cs->statusp = resp->status = status;
   6189 			goto final;
   6190 		}
   6191 
   6192 		/* Ensure specified filehandle matches */
   6193 		if (cs->vp != sp->rs_finfo->rf_vp) {
   6194 			rfs4_state_rele(sp);
   6195 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6196 			goto final;
   6197 		}
   6198 
   6199 		/* hold off other access to open_owner while we tinker */
   6200 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
   6201 
   6202 		rc = mds_check_stateid_seqid(sp, stateid);
   6203 		switch (rc) {
   6204 		case NFS4_CHECK_STATEID_OLD:
   6205 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6206 			goto end;
   6207 		case NFS4_CHECK_STATEID_BAD:
   6208 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6209 			goto end;
   6210 		case NFS4_CHECK_STATEID_EXPIRED:
   6211 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   6212 			goto end;
   6213 		case NFS4_CHECK_STATEID_UNCONFIRMED:
   6214 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6215 			goto end;
   6216 		case NFS4_CHECK_STATEID_CLOSED:
   6217 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6218 			goto end;
   6219 		case NFS4_CHECK_STATEID_OKAY:
   6220 		case NFS4_CHECK_STATEID_REPLAY:
   6221 			break;
   6222 		}
   6223 
   6224 		/*
   6225 		 * Use the clientid4 from the session.
   6226 		 *
   6227 		 * XXX a quick hack is to plop the clientid4 from the
   6228 		 * XXX compound state into the lock_owner structure,
   6229 		 * XXX since the _hash and _compare functions use
   6230 		 * XXX that field.
   6231 		 */
   6232 		olo->lock_owner.clientid = cs->cp->rc_clientid;
   6233 
   6234 		lo = findlockowner(cs->instp, &olo->lock_owner, &lcreate);
   6235 		if (lo == NULL) {
   6236 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
   6237 			goto end;
   6238 		}
   6239 
   6240 		lsp = mds_findlo_state_by_owner(lo, sp, &create);
   6241 		if (lsp == NULL) {
   6242 			rfs4_update_lease(sp->rs_owner->ro_client);
   6243 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   6244 			rfs4_lockowner_rele(lo);
   6245 			goto end;
   6246 		}
   6247 
   6248 		/*
   6249 		 * This is the new_lock_owner branch and the client is
   6250 		 * supposed to be associating a new lock_owner with
   6251 		 * the open file at this point.  If we find that a
   6252 		 * lock_owner/state association already exists and a
   6253 		 * successful LOCK request was returned to the client,
   6254 		 * an error is returned to the client since this is
   6255 		 * not appropriate.  The client should be using the
   6256 		 * existing lock_owner branch.
   6257 		 */
   6258 		if (create == FALSE) {
   6259 			if (lsp->rls_lock_completed == TRUE) {
   6260 				*cs->statusp =
   6261 				    resp->status = NFS4ERR_BAD_SEQID;
   6262 				rfs4_lockowner_rele(lo);
   6263 				goto end;
   6264 			}
   6265 		}
   6266 
   6267 		rfs4_update_lease(sp->rs_owner->ro_client);
   6268 		rfs4_dbe_lock(lsp->rls_dbe);
   6269 
   6270 		/* hold off other access to lsp while we tinker */
   6271 		rfs4_sw_enter(&lsp->rls_sw);
   6272 		ls_sw_held = TRUE;
   6273 
   6274 		rfs4_dbe_unlock(lsp->rls_dbe);
   6275 
   6276 		rfs4_lockowner_rele(lo);
   6277 	} else {
   6278 		/*
   6279 		 * validate lock_seqid is zero.
   6280 		 */
   6281 		if (mds_strict_seqid &&
   6282 		    args->locker.locker4_u.lock_owner.lock_seqid) {
   6283 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   6284 			goto final;
   6285 		}
   6286 
   6287 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
   6288 		/* get lsp and hold the lock on the underlying file struct */
   6289 		if ((status = rfs4_get_lo_state(cs, stateid, &lsp, TRUE))
   6290 		    != NFS4_OK) {
   6291 			*cs->statusp = resp->status = status;
   6292 			goto final;
   6293 		}
   6294 		create = FALSE;	/* We didn't create lsp */
   6295 
   6296 		/* Ensure specified filehandle matches */
   6297 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
   6298 			rfs4_lo_state_rele(lsp, TRUE);
   6299 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6300 			goto final;
   6301 		}
   6302 
   6303 		/* hold off other access to lsp while we tinker */
   6304 		rfs4_sw_enter(&lsp->rls_sw);
   6305 		ls_sw_held = TRUE;
   6306 
   6307 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
   6308 		/*
   6309 		 * The stateid looks like it was okay (expected to be
   6310 		 * the next one)
   6311 		 */
   6312 		case NFS4_CHECK_STATEID_OKAY:
   6313 			/*
   6314 			 * The sequence id is now checked.  Determine
   6315 			 * if this is a replay or if it is in the
   6316 			 * expected (next) sequence.  In the case of a
   6317 			 * replay, there are two replay conditions
   6318 			 * that may occur.  The first is the normal
   6319 			 * condition where a LOCK is done with a
   6320 			 * NFS4_OK response and the stateid is
   6321 			 * updated.  That case is handled below when
   6322 			 * the stateid is identified as a REPLAY.  The
   6323 			 * second is the case where an error is
   6324 			 * returned, like NFS4ERR_DENIED, and the
   6325 			 * sequence number is updated but the stateid
   6326 			 * is not updated.  This second case is dealt
   6327 			 * with here.  So it may seem odd that the
   6328 			 * stateid is okay but the sequence id is a
   6329 			 * replay but it is okay.
   6330 			 */
   6331 			/* XXX: rbg -- missing code ? :-)  */
   6332 			break;
   6333 		case NFS4_CHECK_STATEID_OLD:
   6334 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6335 			goto end;
   6336 		case NFS4_CHECK_STATEID_BAD:
   6337 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6338 			goto end;
   6339 		case NFS4_CHECK_STATEID_EXPIRED:
   6340 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   6341 			goto end;
   6342 		case NFS4_CHECK_STATEID_CLOSED:
   6343 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6344 			goto end;
   6345 		case NFS4_CHECK_STATEID_REPLAY:
   6346 			ASSERT(0);
   6347 			break;
   6348 		default:
   6349 			ASSERT(FALSE);
   6350 			break;
   6351 		}
   6352 
   6353 		rfs4_update_lease(lsp->rls_locker->rl_client);
   6354 	}
   6355 
   6356 	/*
   6357 	 * NFS4 only allows locking on regular files, so
   6358 	 * verify type of object.
   6359 	 */
   6360 	if (cs->vp->v_type != VREG) {
   6361 		if (cs->vp->v_type == VDIR)
   6362 			status = NFS4ERR_ISDIR;
   6363 		else
   6364 			status = NFS4ERR_INVAL;
   6365 		goto out;
   6366 	}
   6367 
   6368 	cp = lsp->rls_state->rs_owner->ro_client;
   6369 
   6370 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
   6371 		status = NFS4ERR_GRACE;
   6372 		goto out;
   6373 	}
   6374 
   6375 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
   6376 		status = NFS4ERR_NO_GRACE;
   6377 		goto out;
   6378 	}
   6379 
   6380 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
   6381 		status = NFS4ERR_NO_GRACE;
   6382 		goto out;
   6383 	}
   6384 
   6385 	if (lsp->rls_state->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE)
   6386 		cs->deleg = TRUE;
   6387 
   6388 	status = rfs4_do_lock(lsp, args->locktype,
   6389 	    args->locker.locker4_u.lock_owner.lock_seqid, args->offset,
   6390 	    args->length, cs->cr, resop);
   6391 
   6392 out:
   6393 	*cs->statusp = resp->status = status;
   6394 
   6395 	if (status == NFS4_OK) {
   6396 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
   6397 		lsp->rls_lock_completed = TRUE;
   6398 	}
   6399 
   6400 end:
   6401 	if (lsp) {
   6402 		if (ls_sw_held)
   6403 			rfs4_sw_exit(&lsp->rls_sw);
   6404 		/*
   6405 		 * If an sp obtained, then the lsp does not represent
   6406 		 * a lock on the file struct.
   6407 		 */
   6408 		if (sp != NULL)
   6409 			rfs4_lo_state_rele(lsp, FALSE);
   6410 		else
   6411 			rfs4_lo_state_rele(lsp, TRUE);
   6412 	}
   6413 	if (sp) {
   6414 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
   6415 		rfs4_state_rele(sp);
   6416 	}
   6417 
   6418 final:
   6419 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
   6420 	    LOCK4res *, resp);
   6421 
   6422 }
   6423 
   6424 /* free function for LOCK/LOCKT */
   6425 /*ARGSUSED*/
   6426 static void
   6427 mds_lock_denied_free(nfs_resop4 *resop, compound_state_t *cs)
   6428 {
   6429 	/* Common function for NFSv4.0 and NFSv4.1 */
   6430 	lock_denied_free(resop);
   6431 }
   6432 
   6433 /*ARGSUSED*/
   6434 void
   6435 mds_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
   6436 	    struct svc_req *req, compound_state_t *cs)
   6437 {
   6438 	/* XXX Currently not using req arg */
   6439 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
   6440 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
   6441 	nfsstat4 status;
   6442 	stateid4 *stateid = &args->lock_stateid;
   6443 	rfs4_lo_state_t *lsp;
   6444 
   6445 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
   6446 	    LOCKU4args *, args);
   6447 
   6448 
   6449 	if (cs->vp == NULL) {
   6450 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   6451 		goto final;
   6452 	}
   6453 
   6454 	if (mds_strict_seqid && args->seqid) {
   6455 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   6456 		goto final;
   6457 	}
   6458 
   6459 	if ((status = rfs4_get_lo_state(cs, stateid, &lsp, TRUE)) != NFS4_OK) {
   6460 		*cs->statusp = resp->status = status;
   6461 		goto final;
   6462 	}
   6463 
   6464 	/* Ensure specified filehandle matches */
   6465 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
   6466 		rfs4_lo_state_rele(lsp, TRUE);
   6467 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6468 		goto final;
   6469 	}
   6470 
   6471 	/* hold off other access to lsp while we tinker */
   6472 	rfs4_sw_enter(&lsp->rls_sw);
   6473 
   6474 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
   6475 	case NFS4_CHECK_STATEID_OKAY:
   6476 		break;
   6477 	case NFS4_CHECK_STATEID_OLD:
   6478 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6479 		goto end;
   6480 	case NFS4_CHECK_STATEID_BAD:
   6481 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   6482 		goto end;
   6483 	case NFS4_CHECK_STATEID_EXPIRED:
   6484 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   6485 		goto end;
   6486 	case NFS4_CHECK_STATEID_CLOSED:
   6487 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   6488 		goto end;
   6489 	case NFS4_CHECK_STATEID_REPLAY:
   6490 		ASSERT(0);
   6491 		break;
   6492 	default:
   6493 		ASSERT(FALSE);
   6494 		break;
   6495 	}
   6496 
   6497 	rfs4_update_lock_sequence(lsp);
   6498 	rfs4_update_lease(lsp->rls_locker->rl_client);
   6499 
   6500 	/*
   6501 	 * NFS4 only allows locking on regular files, so
   6502 	 * verify type of object.
   6503 	 */
   6504 	if (cs->vp->v_type != VREG) {
   6505 		if (cs->vp->v_type == VDIR)
   6506 			status = NFS4ERR_ISDIR;
   6507 		else
   6508 			status = NFS4ERR_INVAL;
   6509 		goto out;
   6510 	}
   6511 
   6512 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
   6513 		status = NFS4ERR_GRACE;
   6514 		goto out;
   6515 	}
   6516 
   6517 	status = rfs4_do_lock(lsp, args->locktype,
   6518 	    args->seqid, args->offset, args->length, cs->cr, resop);
   6519 
   6520 out:
   6521 	*cs->statusp = resp->status = status;
   6522 
   6523 	if (status == NFS4_OK)
   6524 		resp->lock_stateid = lsp->rls_lockid.stateid;
   6525 
   6526 end:
   6527 	rfs4_sw_exit(&lsp->rls_sw);
   6528 	rfs4_lo_state_rele(lsp, TRUE);
   6529 
   6530 final:
   6531 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
   6532 	    LOCKU4res *, resp);
   6533 
   6534 }
   6535 
   6536 /*
   6537  * LOCKT is a best effort routine, the client can not be guaranteed that
   6538  * the status return is still in effect by the time the reply is received.
   6539  * They are numerous race conditions in this routine, but we are not required
   6540  * and can not be accurate.
   6541  */
   6542 /*ARGSUSED*/
   6543 void
   6544 mds_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
   6545 	    struct svc_req *req, compound_state_t *cs)
   6546 {
   6547 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
   6548 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
   6549 	rfs4_lockowner_t *lo;
   6550 	bool_t create = FALSE;
   6551 	struct flock64 flk;
   6552 	int error;
   6553 	int flag = FREAD | FWRITE;
   6554 	int ltype;
   6555 	length4 posix_length;
   6556 	sysid_t sysid;
   6557 	pid_t pid;
   6558 	caller_context_t ct;
   6559 
   6560 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
   6561 	    LOCKT4args *, args);
   6562 
   6563 	if (cs->vp == NULL) {
   6564 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   6565 		goto final;
   6566 	}
   6567 
   6568 	args->owner.clientid = cs->cp->rc_clientid;
   6569 
   6570 	ct.cc_sysid = 0;
   6571 	ct.cc_pid = 0;
   6572 	ct.cc_caller_id = cs->instp->caller_id;
   6573 	ct.cc_flags = CC_DONTBLOCK;
   6574 
   6575 	/*
   6576 	 * NFS4 only allows locking on regular files, so
   6577 	 * verify type of object.
   6578 	 */
   6579 	if (cs->vp->v_type != VREG) {
   6580 		if (cs->vp->v_type == VDIR)
   6581 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
   6582 		else
   6583 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
   6584 		goto final;
   6585 	}
   6586 
   6587 	resp->status = NFS4_OK;
   6588 
   6589 	switch (args->locktype) {
   6590 	case READ_LT:
   6591 	case READW_LT:
   6592 		ltype = F_RDLCK;
   6593 		break;
   6594 	case WRITE_LT:
   6595 	case WRITEW_LT:
   6596 		ltype = F_WRLCK;
   6597 		break;
   6598 	}
   6599 
   6600 	posix_length = args->length;
   6601 	/* Check for zero length. To lock to end of file use all ones for V4 */
   6602 	if (posix_length == 0) {
   6603 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   6604 		goto final;
   6605 	} else if (posix_length == (length4)(~0)) {
   6606 		posix_length = 0;	/* Posix to end of file  */
   6607 	}
   6608 
   6609 	/* Find or create a lockowner */
   6610 	lo = findlockowner(cs->instp, &args->owner, &create);
   6611 
   6612 	if (lo) {
   6613 		pid = lo->rl_pid;
   6614 		if ((resp->status =
   6615 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
   6616 		goto out;
   6617 	} else {
   6618 		pid = 0;
   6619 		sysid = cs->instp->lockt_sysid;
   6620 	}
   6621 retry:
   6622 	flk.l_type = ltype;
   6623 	flk.l_whence = 0;		/* SEEK_SET */
   6624 	flk.l_start = args->offset;
   6625 	flk.l_len = posix_length;
   6626 	flk.l_sysid = sysid;
   6627 	flk.l_pid = pid;
   6628 	flag |= F_REMOTELOCK;
   6629 
   6630 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
   6631 	if (flk.l_len < 0 || flk.l_start < 0) {
   6632 		resp->status = NFS4ERR_INVAL;
   6633 		goto out;
   6634 	}
   6635 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
   6636 	    NULL, cs->cr, &ct);
   6637 
   6638 	/*
   6639 	 * N.B. We map error values to nfsv4 errors. This is differrent
   6640 	 * than puterrno4 routine.
   6641 	 */
   6642 	switch (error) {
   6643 	case 0:
   6644 		if (flk.l_type == F_UNLCK)
   6645 			resp->status = NFS4_OK;
   6646 		else {
   6647 			if (mds_lock_denied(cs->instp, &resp->denied, &flk)
   6648 			    == NFS4ERR_EXPIRED)
   6649 				goto retry;
   6650 			resp->status = NFS4ERR_DENIED;
   6651 		}
   6652 		break;
   6653 	case EOVERFLOW:
   6654 		resp->status = NFS4ERR_INVAL;
   6655 		break;
   6656 	case EINVAL:
   6657 		resp->status = NFS4ERR_NOTSUPP;
   6658 		break;
   6659 	default:
   6660 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
   6661 		    error);
   6662 		resp->status = NFS4ERR_SERVERFAULT;
   6663 		break;
   6664 	}
   6665 
   6666 out:
   6667 	if (lo)
   6668 		rfs4_lockowner_rele(lo);
   6669 	*cs->statusp = resp->status;
   6670 
   6671 final:
   6672 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
   6673 	    LOCKT4res *, resp);
   6674 }
   6675 
   6676 /*
   6677  * NFSv4.1 Server Sessions
   6678  */
   6679 
   6680 /* Renew Lease */
   6681 void
   6682 mds_refresh(mds_session_t *sp)
   6683 {
   6684 	rfs4_client_t	*cp;
   6685 
   6686 	ASSERT(sp != NULL && sp->sn_clnt != NULL);
   6687 	rfs4_dbe_lock(sp->sn_dbe);
   6688 	cp = sp->sn_clnt;
   6689 	sp->sn_laccess = gethrestime_sec();
   6690 	rfs4_dbe_unlock(sp->sn_dbe);
   6691 
   6692 	rfs4_dbe_hold(cp->rc_dbe);
   6693 	rfs4_update_lease(cp);
   6694 	rfs4_client_rele(cp);
   6695 }
   6696 
   6697 
   6698 nfsstat4
   6699 mds_lease_chk(mds_session_t *sp)
   6700 {
   6701 	rfs4_client_t	*cp;
   6702 	nfsstat4	 error = NFS4_OK;
   6703 
   6704 	/*
   6705 	 * If the client lease expired, go ahead and invalidate
   6706 	 * all the sessions associated with this clientid.
   6707 	 */
   6708 	ASSERT(sp != NULL && sp->sn_clnt != NULL);
   6709 	cp = sp->sn_clnt;
   6710 
   6711 	if (rfs4_lease_expired(cp)) {
   6712 		error = NFS4ERR_BADSESSION;
   6713 	}
   6714 	return (error);
   6715 }
   6716 
   6717 /*
   6718  * Rudimentary server implementation (XXX - for now)
   6719  */
   6720 void
   6721 mds_get_server_impl_id(EXCHANGE_ID4resok *resp)
   6722 {
   6723 	timestruc_t	 currtime;
   6724 	char		*sol_impl = "Solaris NFSv4.1 Server Implementation";
   6725 	char		*sol_idom = "nfsv41.ietf.org";
   6726 	void		*p;
   6727 	uint_t		 len = 0;
   6728 	nfs_impl_id4	*nip;
   6729 
   6730 	resp->eir_server_impl_id.eir_server_impl_id_len = 1;
   6731 	nip = kmem_zalloc(sizeof (nfs_impl_id4), KM_SLEEP);
   6732 	resp->eir_server_impl_id.eir_server_impl_id_val = nip;
   6733 
   6734 	/* Domain */
   6735 	nip->nii_domain.utf8string_len = len = strlen(sol_idom);
   6736 	p = kmem_zalloc(len * sizeof (char), KM_SLEEP);
   6737 	nip->nii_domain.utf8string_val = p;
   6738 	bcopy(sol_idom, p, len);
   6739 
   6740 	/* Implementation */
   6741 	nip->nii_name.utf8string_len = len = strlen(sol_impl);
   6742 	p = kmem_zalloc(len * sizeof (char), KM_SLEEP);
   6743 	nip->nii_name.utf8string_val = p;
   6744 	bcopy(sol_impl, p, len);
   6745 
   6746 	/* Time */
   6747 	gethrestime(&currtime);
   6748 	(void) nfs4_time_vton(&currtime, &nip->nii_date);
   6749 }
   6750 
   6751 /*
   6752  * Principal handling routines
   6753  */
   6754 void
   6755 rfs4_free_cred_princ(rfs4_client_t *cp)
   6756 {
   6757 	cred_princ_t		*p;
   6758 	rpc_gss_principal_t	 ppl;
   6759 
   6760 	ASSERT(cp != NULL);
   6761 	if ((p = cp->rc_cr_set) == NULL)
   6762 		return;
   6763 
   6764 	switch (p->cp_aflavor) {
   6765 	case AUTH_DES:
   6766 		kmem_free(p->cp_princ, strlen(p->cp_princ) + 1);
   6767 		break;
   6768 
   6769 	case RPCSEC_GSS:
   6770 		ppl = (rpc_gss_principal_t)p->cp_princ;
   6771 		kmem_free(ppl, ppl->len + sizeof (int));
   6772 		break;
   6773 	}
   6774 	kmem_free(p, sizeof (cred_princ_t));
   6775 	cp->rc_cr_set = NULL;
   6776 }
   6777 
   6778 static rpc_gss_principal_t
   6779 rfs4_dup_princ(rpc_gss_principal_t ppl)
   6780 {
   6781 	rpc_gss_principal_t	pdup;
   6782 	int			len;
   6783 
   6784 	if (ppl == NULL)
   6785 		return (NULL);
   6786 
   6787 	len = sizeof (int) + ppl->len;
   6788 	pdup = (rpc_gss_principal_t)kmem_alloc(len, KM_SLEEP);
   6789 	bcopy(ppl, pdup, len);
   6790 	return (pdup);
   6791 }
   6792 
   6793 void
   6794 rfs4_set_cred_princ(cred_princ_t **pp, struct compound_state *cs)
   6795 {
   6796 	cred_princ_t	*p;
   6797 	caddr_t		 t;
   6798 
   6799 	ASSERT(pp != NULL);
   6800 
   6801 
   6802 	if (*pp == NULL)
   6803 		*pp = kmem_zalloc(sizeof (cred_princ_t), KM_SLEEP);
   6804 
   6805 	p = *pp;
   6806 
   6807 	p->cp_cr = crdup(cs->basecr);
   6808 	p->cp_aflavor = cs->req->rq_cred.oa_flavor;
   6809 	p->cp_secmod = cs->nfsflavor;	/* secmod != flavor for RPCSEC_GSS */
   6810 
   6811 	/*
   6812 	 * Set principal as per security flavor
   6813 	 */
   6814 	switch (p->cp_aflavor) {
   6815 	case AUTH_DES:
   6816 		p->cp_princ = kstrdup(cs->principal);
   6817 		break;
   6818 
   6819 	case RPCSEC_GSS:
   6820 		t = (caddr_t)rfs4_dup_princ((rpc_gss_principal_t)cs->principal);
   6821 		p->cp_princ = (caddr_t)t;
   6822 		break;
   6823 
   6824 	case AUTH_SYS:
   6825 	case AUTH_NONE:
   6826 	default:
   6827 		break;
   6828 	}
   6829 }
   6830 
   6831 /* returns 0 if no match; or 1 for a match */
   6832 int
   6833 rfs4_cmp_cred_princ(cred_princ_t *p, struct compound_state *cs)
   6834 {
   6835 	int			 rc = 0;
   6836 	rpc_gss_principal_t	 recp;		/* cached clnt princ */
   6837 	rpc_gss_principal_t	 ibrp;		/* inbound req princ */
   6838 
   6839 
   6840 	if (p == NULL)
   6841 		return (rc);	/* nothing to compare with */
   6842 
   6843 	if (p->cp_aflavor != cs->req->rq_cred.oa_flavor)
   6844 		return (rc);
   6845 
   6846 	if (p->cp_secmod != cs->nfsflavor)
   6847 		return (rc);
   6848 
   6849 	if (crcmp(p->cp_cr, cs->basecr))
   6850 		return (rc);
   6851 
   6852 	switch (p->cp_aflavor) {
   6853 	case AUTH_DES:
   6854 		rc = (strcmp(p->cp_princ, cs->principal) == 0);
   6855 		break;
   6856 
   6857 	case RPCSEC_GSS:
   6858 		recp = (rpc_gss_principal_t)p->cp_princ;
   6859 		ibrp = (rpc_gss_principal_t)cs->principal;
   6860 
   6861 		if (recp->len != ibrp->len)
   6862 			break;
   6863 		rc = (bcmp(recp->name, ibrp->name, ibrp->len) == 0);
   6864 		break;
   6865 
   6866 	case AUTH_SYS:
   6867 	case AUTH_NONE:
   6868 	default:
   6869 		rc = 1;
   6870 		break;
   6871 	}
   6872 	return (rc);
   6873 }
   6874 
   6875 /* { co_ownerid, co_verifier, principal, clientid, confirmed } */
   6876 rfs4_client_t *
   6877 client_record(nfs_client_id4 *cip, struct compound_state *cs)
   6878 {
   6879 	rfs4_client_t	*cp;
   6880 	bool_t		 create = TRUE;
   6881 
   6882 	/*
   6883 	 * 1. co_ownerid
   6884 	 * 2. co_verifier
   6885 	 */
   6886 	cp = findclient(cs->instp, cip, &create, NULL);
   6887 
   6888 	/* 3. principal */
   6889 	if (cp != NULL)
   6890 		rfs4_set_cred_princ(&cp->rc_cr_set, cs);
   6891 
   6892 	/*
   6893 	 * Both of the following items of the 5-tuple are built as
   6894 	 * part of creating the rfs4_client_t.
   6895 	 *
   6896 	 * 4. clientid;		created as part of findclient()
   6897 	 * 5. confirmed;	cp->need_confirmed is initialized to TRUE
   6898 	 */
   6899 	return (cp);
   6900 }
   6901 
   6902 rfs4_client_t *
   6903 client_lookup(nfs_client_id4 *cip, struct compound_state *cs)
   6904 {
   6905 	bool_t	create = FALSE;
   6906 
   6907 	return (findclient(cs->instp, cip, &create, NULL));
   6908 }
   6909 
   6910 bool_t
   6911 nfs_clid4_cmp(nfs_client_id4 *s1, nfs_client_id4 *s2)
   6912 {
   6913 	if (s1->verifier != s2->verifier)
   6914 		return (FALSE);
   6915 	if (bcmp(s1->id_val, s2->id_val, s2->id_len))
   6916 		return (FALSE);
   6917 	return (TRUE);
   6918 }
   6919 
   6920 /*
   6921  * Compute the "use bits", i.e. the flags specifying the permissible
   6922  * regular, MDS, and data server ops for the returned clientid.
   6923  *
   6924  * The minorversion1 specification allows a server implementor two
   6925  * alternatives: allow PNFS_MDS and PNFS_DS on the same clientid, or
   6926  * force the client to create separate clientids to distinguish
   6927  * MDS versus DS operations.
   6928  *
   6929  * Our design distinguishes operations based upon filehandle, and thus
   6930  * there is no reason to force the client to create separate clientids.
   6931  * Thus, we give the client as much as possible, while keeping the result
   6932  * within the allowed combinations as specified in the specification.
   6933  *
   6934  * Our constraints are: use a subset of the client's request, unless
   6935  * the client requested nothing, in which case we may return any
   6936  * legal combination; and, the combination of NON_PNFS and PNFS_MDS
   6937  * may not both be set in the results.  These constraints are reflected
   6938  * in the ASSERT()s at the end.
   6939  */
   6940 
   6941 static uint32_t
   6942 compute_use_pnfs_flags(uint32_t request)
   6943 {
   6944 	uint32_t rc;
   6945 
   6946 	/* Start with the client's initial request */
   6947 	rc = request & EXCHGID4_FLAG_MASK_PNFS;
   6948 
   6949 	/* If the client requested nothing, return the most permissive. */
   6950 	if (rc == 0) {
   6951 		rc = (EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS);
   6952 		goto done;
   6953 	}
   6954 
   6955 	/* Don't permit the illegal combination of MDS and NON_PNFS */
   6956 	if ((rc &
   6957 	    (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS)) ==
   6958 	    (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))
   6959 		rc &= ~EXCHGID4_FLAG_USE_NON_PNFS;
   6960 
   6961 done:
   6962 	ASSERT(((request & EXCHGID4_FLAG_MASK_PNFS) == 0) ||
   6963 	    ((rc & ~(request & EXCHGID4_FLAG_MASK_PNFS)) == 0));
   6964 	ASSERT((rc & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))
   6965 	    != (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS));
   6966 	ASSERT(rc != 0);
   6967 
   6968 	return (rc);
   6969 }
   6970 
   6971 /*
   6972  * Session Trunking Support
   6973  */
   6974 static struct netbuf *
   6975 netbuf_dup(struct netbuf *obp)
   6976 {
   6977 	struct netbuf	*np = NULL;
   6978 
   6979 	np = (struct netbuf *)kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
   6980 	np->maxlen = np->len = obp->len;
   6981 	np->buf = (char *)kmem_zalloc(obp->len, KM_SLEEP);
   6982 	bcopy(obp->buf, np->buf, obp->len);
   6983 
   6984 	return (np);
   6985 }
   6986 
   6987 static void
   6988 netbuf_destroy(struct netbuf *np)
   6989 {
   6990 	kmem_free((char *)np->buf, np->len);
   6991 	kmem_free((struct netbuf *)np, sizeof (struct netbuf));
   6992 }
   6993 
   6994 static t_scalar_t
   6995 svc_get_type(SVCXPRT *xprt)
   6996 {
   6997 	t_scalar_t	xtype;
   6998 
   6999 	xtype = svc_gettype(xprt);
   7000 	switch (xtype) {
   7001 	case T_RDMA:
   7002 		break;
   7003 
   7004 	case T_COTS:
   7005 	case T_COTS_ORD:
   7006 		xtype = T_COTS_ORD;
   7007 		break;
   7008 
   7009 	case T_CLTS:
   7010 	default:
   7011 		cmn_err(CE_WARN, "svc_get_type: Bad service type %d\n", xtype);
   7012 		xtype = 0;
   7013 	}
   7014 	return (xtype);
   7015 }
   7016 
   7017 static rfs41_tie_t *
   7018 rfs41_tie_init(SVCXPRT *xprt)
   7019 {
   7020 	rfs41_tie_t		*tip = NULL;
   7021 	struct sockaddr		*sa;
   7022 	struct sockaddr_in	*sa4;
   7023 	struct sockaddr_in6	*sa6;
   7024 
   7025 	tip = kmem_zalloc(sizeof (rfs41_tie_t), KM_SLEEP);
   7026 
   7027 	sa = (struct sockaddr *)svc_getendpoint(xprt);
   7028 	tip->t_famly = sa->sa_family;
   7029 	tip->t_xtype = svc_get_type(xprt);
   7030 	tip->t_netbf = netbuf_dup(svc_getlocaladdr(xprt));
   7031 
   7032 	switch (tip->t_famly) {
   7033 	case AF_INET:
   7034 		sa4 = (struct sockaddr_in *)(tip->t_netbf->buf);
   7035 		bcopy(&sa4->sin_addr, &tip->t_ipaddr_u.ip4,
   7036 		    sizeof (struct in_addr));
   7037 		break;
   7038 
   7039 	case AF_INET6:
   7040 		sa6 = (struct sockaddr_in6 *)(tip->t_netbf->buf);
   7041 		bcopy(&sa6->sin6_addr, &tip->t_ipaddr_u.ip6,
   7042 		    sizeof (struct in6_addr));
   7043 		break;
   7044 
   7045 	default:
   7046 		cmn_err(CE_WARN, "rfs41_tie_init: Bad family (%d)\n",
   7047 		    tip->t_famly);
   7048 		netbuf_destroy(tip->t_netbf);
   7049 		kmem_free(tip, sizeof (rfs41_tie_t));
   7050 		tip = NULL;
   7051 		break;
   7052 	}
   7053 	return (tip);
   7054 }
   7055 
   7056 static void
   7057 rfs41_exid_so_major(struct server_owner4 *sop, struct compound_state *cs)
   7058 {
   7059 	int len = sizeof (void *) / sizeof (char);
   7060 
   7061 	sop->so_major_id.so_major_id_len = (len * 2) + 1;
   7062 	sop->so_major_id.so_major_id_val = tohex(cs->instp, len);
   7063 }
   7064 
   7065 /*
   7066  * XXX - rfs4_srv_trunk_test is disabled by default; enabling it will
   7067  *	 cause ip_dump() to spew addresses of inbound EXCHANGE_ID's
   7068  *	 to the console. rfs4_srv_trunk_test and ip_dump() will go
   7069  *	 away after client trunking is done. This is handy info to
   7070  *	 have for debugging.
   7071  */
   7072 int	rfs4_srv_trunk_test = 0;
   7073 
   7074 static void
   7075 ip_dump(struct netbuf *np, char *msg)
   7076 {
   7077 	struct sockaddr_in	*sa4;
   7078 	struct sockaddr_in6	*sa6;
   7079 
   7080 	if (np == NULL || np->buf == NULL || !rfs4_srv_trunk_test)
   7081 		return;
   7082 
   7083 	sa4 = (struct sockaddr_in *)(np->buf);
   7084 	switch (sa4->sin_family) {
   7085 	case AF_INET:
   7086 		cmn_err(CE_WARN, "\n%s ip: %d.%d.%d.%d", msg,
   7087 		    sa4->sin_addr.S_un.S_un_b.s_b1,
   7088 		    sa4->sin_addr.S_un.S_un_b.s_b2,
   7089 		    sa4->sin_addr.S_un.S_un_b.s_b3,
   7090 		    sa4->sin_addr.S_un.S_un_b.s_b4);
   7091 		break;
   7092 
   7093 	case AF_INET6:
   7094 		sa6 = (struct sockaddr_in6 *)(np->buf);
   7095 		cmn_err(CE_WARN, "\n%s ip6: "
   7096 		    "%2x%2x:%0x%0x:%0x%0x:%0x%0x:%2x%2x:%2x%2x:%2x%2x:%2x%2x",
   7097 		    msg,
   7098 		    sa6->sin6_addr._S6_un._S6_u8[0],
   7099 		    sa6->sin6_addr._S6_un._S6_u8[1],
   7100 		    sa6->sin6_addr._S6_un._S6_u8[2],
   7101 		    sa6->sin6_addr._S6_un._S6_u8[3],
   7102 		    sa6->sin6_addr._S6_un._S6_u8[4],
   7103 		    sa6->sin6_addr._S6_un._S6_u8[5],
   7104 		    sa6->sin6_addr._S6_un._S6_u8[6],
   7105 		    sa6->sin6_addr._S6_un._S6_u8[7],
   7106 		    sa6->sin6_addr._S6_un._S6_u8[8],
   7107 		    sa6->sin6_addr._S6_un._S6_u8[9],
   7108 		    sa6->sin6_addr._S6_un._S6_u8[10],
   7109 		    sa6->sin6_addr._S6_un._S6_u8[11],
   7110 		    sa6->sin6_addr._S6_un._S6_u8[12],
   7111 		    sa6->sin6_addr._S6_un._S6_u8[13],
   7112 		    sa6->sin6_addr._S6_un._S6_u8[14],
   7113 		    sa6->sin6_addr._S6_un._S6_u8[15]);
   7114 		break;
   7115 
   7116 	default:
   7117 		cmn_err(CE_WARN, "%s <cannot translate ip>", msg);
   7118 		break;
   7119 	}
   7120 }
   7121 
   7122 static int
   7123 ip_addr_cmp(rfs41_tie_t *tip, rfs41_tie_t *p)
   7124 {
   7125 	int	match = 0;
   7126 
   7127 	ASSERT(tip != NULL);
   7128 	ASSERT(p != NULL);
   7129 
   7130 	if (tip->t_famly != p->t_famly)
   7131 		return (0);
   7132 
   7133 	if (tip->t_famly == AF_INET) {
   7134 		if (bcmp(&tip->t_ipaddr_u.ip4, &p->t_ipaddr_u.ip4,
   7135 		    sizeof (struct in_addr)) == 0) {
   7136 			match = 1;			/* IPv4 addr match */
   7137 		}
   7138 	} else if (tip->t_famly == AF_INET6) {
   7139 		if (bcmp(&tip->t_ipaddr_u.ip6, &p->t_ipaddr_u.ip6,
   7140 		    sizeof (struct in6_addr)) == 0) {
   7141 			match = 1;			/* IPv6 addr match */
   7142 		}
   7143 	}
   7144 
   7145 	return (match);
   7146 }
   7147 
   7148 static void
   7149 rfs41_set_trunkinfo(SVCXPRT *xprt, struct compound_state *cs, rfs4_client_t *cp,
   7150     EXCHANGE_ID4resok *rok)
   7151 {
   7152 	rfs41_tie_t	*tip;
   7153 	rfs41_tie_t	*p;
   7154 
   7155 	ASSERT(cs != NULL && cp != NULL && rok != NULL);
   7156 	if (cs == NULL || cp == NULL || rok == NULL)
   7157 		return;
   7158 
   7159 	/*
   7160 	 * start out w/some sane defaults
   7161 	 * XXX - scope needs to be revisited.
   7162 	 */
   7163 	rok->eir_clientid = cp->rc_clientid;
   7164 	rfs41_exid_so_major(&rok->eir_server_owner, cs);
   7165 
   7166 	ip_dump(svc_getlocaladdr(xprt), "inbound");
   7167 
   7168 	/* build trunkinfo entry */
   7169 	if (xprt == NULL || (tip = rfs41_tie_init(xprt)) == NULL)
   7170 		return;
   7171 
   7172 	/* fastpath for 1st exid */
   7173 	rfs4_dbe_lock(cp->rc_dbe);
   7174 	if (list_is_empty(&cp->rc_trunkinfo)) {
   7175 		list_insert_head(&cp->rc_trunkinfo, tip);
   7176 		ip_dump(tip->t_netbf, "first-in-list");
   7177 		rok->eir_server_owner.so_minor_id =
   7178 		    (uint64_t)(uintptr_t)&cp->rc_trunkinfo;
   7179 		rfs4_dbe_unlock(cp->rc_dbe);
   7180 		return;
   7181 	}
   7182 
   7183 	/* run thru trunkinfo list to see if IP has been seen */
   7184 	for (p = list_head(&cp->rc_trunkinfo); p != NULL;
   7185 	    p = list_next(&cp->rc_trunkinfo, p)) {
   7186 
   7187 		/*
   7188 		 * Is the IP already in list ?
   7189 		 */
   7190 		if (ip_addr_cmp(tip, p)) {
   7191 			ip_dump(p->t_netbf, "already-in-list");
   7192 			rok->eir_server_owner.so_minor_id =
   7193 			    (uint64_t)(uintptr_t)&cp->rc_trunkinfo;
   7194 			rfs4_dbe_unlock(cp->rc_dbe);
   7195 			return;
   7196 		}
   7197 	}
   7198 
   7199 	/* IP hasn't been seen; rerun list to see if equivalent exists */
   7200 	for (p = list_head(&cp->rc_trunkinfo); p != NULL;
   7201 	    p = list_next(&cp->rc_trunkinfo, p)) {
   7202 
   7203 		/*
   7204 		 * Do we have an equivalent (ie. transport) entry
   7205 		 */
   7206 		if (p->t_xtype == tip->t_xtype) {
   7207 			list_insert_head(&cp->rc_trunkinfo, tip);
   7208 			ip_dump(tip->t_netbf, "Equiv FOUND: inserted-in-list");
   7209 			rok->eir_server_owner.so_minor_id =
   7210 			    (uint64_t)(uintptr_t)&cp->rc_trunkinfo;
   7211 			rfs4_dbe_unlock(cp->rc_dbe);
   7212 			return;
   7213 		}
   7214 	}
   7215 
   7216 	/* nothing in list has same IP addr or is equivalent to tip */
   7217 	list_insert_head(&cp->rc_trunkinfo, tip);
   7218 	ip_dump(tip->t_netbf, "No IP or Equiv FOUND: inserted-in-list");
   7219 	rfs4_dbe_unlock(cp->rc_dbe);
   7220 	rok->eir_server_owner.so_minor_id = (uint64_t)(uintptr_t)&tip;
   7221 }
   7222 
   7223 void
   7224 mds_clean_up_trunkinfo(rfs4_client_t *cp)
   7225 {
   7226 	rfs41_tie_t	*p;
   7227 
   7228 	ASSERT(cp != NULL);
   7229 	if (cp == NULL)
   7230 		return;
   7231 
   7232 	rfs4_dbe_lock(cp->rc_dbe);
   7233 	while (p = list_remove_head(&cp->rc_trunkinfo)) {
   7234 		netbuf_destroy(p->t_netbf);
   7235 		kmem_free(p, sizeof (rfs41_tie_t));
   7236 	}
   7237 	list_destroy(&cp->rc_trunkinfo);
   7238 	rfs4_dbe_unlock(cp->rc_dbe);
   7239 }
   7240 
   7241 /*ARGSUSED*/
   7242 static void
   7243 mds_op_exid_free(nfs_resop4 *resop, compound_state_t *cs)
   7244 {
   7245 	EXCHANGE_ID4res		*resp = &resop->nfs_resop4_u.opexchange_id;
   7246 	EXCHANGE_ID4resok	*rok = &resp->EXCHANGE_ID4res_u.eir_resok4;
   7247 	struct server_owner4	*sop = &rok->eir_server_owner;
   7248 	nfs_impl_id4		*nip;
   7249 	int			 len = 0;
   7250 
   7251 	/* Server Owner: major */
   7252 	if ((len = sop->so_major_id.so_major_id_len) != 0)
   7253 		kmem_free(sop->so_major_id.so_major_id_val, len);
   7254 
   7255 	if ((nip = rok->eir_server_impl_id.eir_server_impl_id_val) != NULL) {
   7256 		/* Immplementation */
   7257 		len = nip->nii_name.utf8string_len;
   7258 		kmem_free(nip->nii_name.utf8string_val, len * sizeof (char));
   7259 
   7260 		/* Domain */
   7261 		len = nip->nii_domain.utf8string_len;
   7262 		kmem_free(nip->nii_domain.utf8string_val, len * sizeof (char));
   7263 
   7264 		/* Server Impl */
   7265 		kmem_free(nip, sizeof (nfs_impl_id4));
   7266 	}
   7267 }
   7268 
   7269 /* XXX - NOTE: EXCHANGE_ID conforms to draft-19 behavior */
   7270 
   7271 /*ARGSUSED*/
   7272 void
   7273 mds_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
   7274     struct svc_req *req, compound_state_t *cs)
   7275 {
   7276 	EXCHANGE_ID4args	*args = &argop->nfs_argop4_u.opexchange_id;
   7277 	EXCHANGE_ID4res		*resp = &resop->nfs_resop4_u.opexchange_id;
   7278 	EXCHANGE_ID4resok	*rok = &resp->EXCHANGE_ID4res_u.eir_resok4;
   7279 	rfs4_client_t		*cp;
   7280 	rfs4_client_t		*ocp;
   7281 	bool_t			 update;
   7282 	client_owner4		*cop;
   7283 	nfs_client_id4		*cip;
   7284 	verifier4		 old_verifier_arg;
   7285 
   7286 	DTRACE_NFSV4_2(op__exchange__id__start,
   7287 	    struct compound_state *, cs,
   7288 	    EXCHANGE_ID4args *, args);
   7289 
   7290 	/*
   7291 	 * EXCHANGE_ID's may be preceded by SEQUENCE
   7292 	 *
   7293 	 * Check that eia_flags only has "valid" spec bits
   7294 	 * and that no 'eir_flag' ONLY bits are specified.
   7295 	 */
   7296 	if (args->eia_flags & ~EXID4_FLAG_MASK ||
   7297 	    args->eia_flags & EXID4_FLAG_INVALID_ARGS) {
   7298 		*cs->statusp = resp->eir_status = NFS4ERR_INVAL;
   7299 		goto final;
   7300 	}
   7301 
   7302 	update = (args->eia_flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A);
   7303 	cop = &args->eia_clientowner;
   7304 	cip = (nfs_client_id4 *)cop;
   7305 
   7306 	/*
   7307 	 * Refer to Section 18.35.4 of draft 19
   7308 	 */
   7309 	cp = client_lookup(cip, cs);
   7310 	if (cp == NULL) {		/* no record exists */
   7311 		if (!update) {
   7312 case1:			/* case 1 - utok */
   7313 			cp = client_record(cip, cs);
   7314 			ASSERT(cp != NULL);
   7315 			*cs->statusp = resp->eir_status = NFS4_OK;
   7316 			rok->eir_clientid = cp->rc_clientid;
   7317 			rok->eir_sequenceid = cp->rc_contrived.xi_sid;
   7318 			goto out;
   7319 		}
   7320 		/* no record and trying to update */
   7321 		*cs->statusp = resp->eir_status = NFS4ERR_NOENT;
   7322 		goto final;
   7323 	}
   7324 
   7325 	/* record exists */
   7326 	old_verifier_arg = cp->rc_nfs_client.verifier;
   7327 	if (CLID_REC_CONFIRMED(cp)) {
   7328 		if (!update) {
   7329 			if (!rfs4_cmp_cred_princ(cp->rc_cr_set, cs)) {
   7330 				/* case 3 */
   7331 				if (rfs4_lease_expired(cp)) {
   7332 					rfs4_client_close(cp);
   7333 					goto case1;
   7334 				}
   7335 				/*
   7336 				 * case 3: clid_in_use - utok
   7337 				 * old_client_ret has unexpired lease w/state.
   7338 				 */
   7339 				*cs->statusp = NFS4ERR_CLID_INUSE;
   7340 				resp->eir_status = NFS4ERR_CLID_INUSE;
   7341 				rfs4_client_rele(cp);
   7342 				goto final;
   7343 
   7344 			} else if (nfs_clid4_cmp(&cp->rc_nfs_client, cip)) {
   7345 				/* case 2 - utok */
   7346 				*cs->statusp = NFS4_OK;
   7347 				resp->eir_status = NFS4_OK;
   7348 				rok->eir_clientid = cp->rc_clientid;
   7349 				rok->eir_sequenceid = cp->rc_contrived.xi_sid;
   7350 				/* trickle down to "out" */
   7351 
   7352 			} else if (old_verifier_arg != cip->verifier) {
   7353 				/* case 5 - utok */
   7354 				/*
   7355 				 * previous incarnation of clientid is first
   7356 				 * hidden such that any subsequent lookups
   7357 				 * will not find it in DB, then the current
   7358 				 * reference to it is dropped; this will
   7359 				 * force the reaper thread to clean it up.
   7360 				 */
   7361 				ocp = cp;
   7362 				mds_clean_up_sessions(ocp);
   7363 				rfs4_dbe_hide(ocp->rc_dbe);
   7364 				rfs4_client_rele(ocp);
   7365 
   7366 				cp = client_record(cip, cs);
   7367 				ASSERT(cp != NULL);
   7368 				*cs->statusp = resp->eir_status = NFS4_OK;
   7369 				rok->eir_clientid = cp->rc_clientid;
   7370 				rok->