Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/systm.h>
     34 #include <sys/cred.h>
     35 #include <sys/buf.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vnode.h>
     38 #include <sys/uio.h>
     39 #include <sys/errno.h>
     40 #include <sys/sysmacros.h>
     41 #include <sys/statvfs.h>
     42 #include <sys/kmem.h>
     43 #include <sys/dirent.h>
     44 #include <sys/cmn_err.h>
     45 #include <sys/disp.h>
     46 #include <sys/debug.h>
     47 #include <sys/systeminfo.h>
     48 #include <sys/flock.h>
     49 #include <sys/pathname.h>
     50 #include <sys/nbmlock.h>
     51 #include <sys/share.h>
     52 #include <sys/atomic.h>
     53 #include <sys/policy.h>
     54 #include <sys/fem.h>
     55 #include <sys/sdt.h>
     56 #include <sys/ddi.h>
     57 #include <sys/modctl.h>
     58 #include <sys/timod.h>
     59 #include <sys/id_space.h>
     60 
     61 #include <rpc/types.h>
     62 #include <rpc/auth.h>
     63 #include <rpc/rpcsec_gss.h>
     64 #include <rpc/svc.h>
     65 
     66 #include <nfs/nfs.h>
     67 #include <nfs/export.h>
     68 #include <nfs/lm.h>
     69 #include <nfs/nfs4.h>
     70 
     71 #include <sys/strsubr.h>
     72 #include <sys/strsun.h>
     73 
     74 #include <inet/common.h>
     75 #include <inet/ip.h>
     76 #include <inet/ip6.h>
     77 
     78 #include <sys/tsol/label.h>
     79 #include <sys/tsol/tndb.h>
     80 
     81 #include <nfs/nfs4_attrmap.h>
     82 #include <nfs/nfs4_srv_attr.h>
     83 #include <nfs/mds_state.h>
     84 #include <nfs/mds_odl.h>
     85 
     86 #include <nfs/nfs41_filehandle.h>
     87 #include <nfs/ctl_mds_clnt.h>
     88 
     89 #include <nfs/spe_impl.h>
     90 
     91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
     92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
     93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
     94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
     95 
     96 int mds_strict_seqid = 0;
     97 
     98 static void ping_cb_null_thr(mds_session_t *);
     99 
    100 /* End of Tunables */
    101 
    102 /*
    103  * Used to bump the stateid4.seqid value and show changes in the stateid
    104  */
    105 #define	next_stateid(sp) (++(sp)->v41_bits.chgseq)
    106 
    107 /*
    108  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
    109  *	This is used to return NFS4ERR_TOOSMALL when clients specify
    110  *	maxcount that isn't large enough to hold the smallest possible
    111  *	XDR encoded dirent.
    112  *
    113  *	    sizeof cookie (8 bytes) +
    114  *	    sizeof name_len (4 bytes) +
    115  *	    sizeof smallest (padded) name (4 bytes) +
    116  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
    117  *	    sizeof attrlist4_len (4 bytes) +
    118  *	    sizeof next boolean (4 bytes)
    119  *
    120  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
    121  * the smallest possible entry4 (assumes no attrs requested).
    122  *	sizeof nfsstat4 (4 bytes) +
    123  *	sizeof verifier4 (8 bytes) +
    124  *	sizeof entry4list bool (4 bytes) +
    125  *	sizeof entry4 	(36 bytes) +
    126  *	sizeof eof bool  (4 bytes)
    127  *
    128  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
    129  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
    130  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
    131  *	required for a given name length.  MAXNAMELEN is the maximum
    132  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
    133  *	macros are to allow for . and .. entries -- just a minor tweak to try
    134  *	and guarantee that buffer we give to VOP_READDIR will be large enough
    135  *	to hold ., .., and the largest possible solaris dirent64.
    136  */
    137 #define	RFS4_MINLEN_ENTRY4 36
    138 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
    139 #define	RFS4_MINLEN_RDDIR_BUF \
    140 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
    141 
    142 /*
    143  * It would be better to pad to 4 bytes since that's what XDR would do,
    144  * but the dirents UFS gives us are already padded to 8, so just take
    145  * what we're given.  Dircount is only a hint anyway.  Currently the
    146  * solaris kernel is ASCII only, so there's no point in calling the
    147  * UTF8 functions.
    148  *
    149  * dirent64: named padded to provide 8 byte struct alignment
    150  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
    151  *
    152  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
    153  *
    154  */
    155 #define	DIRENT64_TO_DIRCOUNT(dp) \
    156 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
    157 
    158 /*
    159  * types of label comparison
    160  */
    161 #define	EQUALITY_CHECK	0
    162 #define	DOMINANCE_CHECK	1
    163 
    164 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
    165 
    166 void		rfs4_init_compound_state(struct compound_state *);
    167 
    168 static void	nullfree(nfs_resop4 *, compound_state_t *);
    169 static void	mds_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    170 			compound_state_t *);
    171 static void	mds_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    172 			compound_state_t *);
    173 static void	mds_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    174 			compound_state_t *);
    175 static void	mds_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    176 			compound_state_t *);
    177 static void	mds_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    178 			compound_state_t *);
    179 static void	mds_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    180 			compound_state_t *);
    181 static void	mds_op_create_free(nfs_resop4 *resop);
    182 static void	mds_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
    183 				struct svc_req *, compound_state_t *);
    184 static void	mds_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    185 			compound_state_t *);
    186 static void	mds_op_getattr_free(nfs_resop4 *, compound_state_t *);
    187 static void	mds_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    188 			compound_state_t *);
    189 static void	mds_op_getfh_free(nfs_resop4 *, compound_state_t *);
    190 static void	mds_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    191 			compound_state_t *);
    192 static void	mds_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    193 			compound_state_t *);
    194 static void	mds_lock_denied_free(nfs_resop4 *, compound_state_t *);
    195 static void	mds_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    196 			compound_state_t *);
    197 static void	mds_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    198 			compound_state_t *);
    199 static void	mds_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    200 			compound_state_t *);
    201 static void	mds_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    202 			compound_state_t *);
    203 static void	mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
    204 				struct svc_req *req, compound_state_t *);
    205 static void	mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    206 			compound_state_t *);
    207 static void	mds_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    208 			compound_state_t *);
    209 static void	mds_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
    210 			struct svc_req *, compound_state_t *);
    211 static void	mds_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    212 			compound_state_t *);
    213 static void	mds_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    214 			compound_state_t *);
    215 static void	mds_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    216 			compound_state_t *);
    217 static void	mds_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    218 			compound_state_t *);
    219 static void	mds_op_read_free(nfs_resop4 *, compound_state_t *);
    220 void		mds_op_readdir(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    221 			compound_state_t *);
    222 static void	mds_op_readdir_free(nfs_resop4 *, compound_state_t *);
    223 static void	mds_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    224 			compound_state_t *);
    225 static void	mds_op_readlink_free(nfs_resop4 *, compound_state_t *);
    226 static void	mds_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
    227 			struct svc_req *, compound_state_t *);
    228 static void	mds_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    229 			compound_state_t *);
    230 static void	mds_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    231 			compound_state_t *);
    232 static void	mds_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    233 			compound_state_t *);
    234 static void	mds_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    235 			compound_state_t *);
    236 static void	mds_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    237 			compound_state_t *);
    238 static void	mds_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    239 			compound_state_t *);
    240 static void	mds_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    241 			compound_state_t *);
    242 static void	mds_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    243 			compound_state_t *);
    244 static void	mds_op_exchange_id(nfs_argop4 *, nfs_resop4 *,
    245 			struct svc_req *, compound_state_t *);
    246 static void	mds_op_exid_free(nfs_resop4 *, compound_state_t *);
    247 static void	mds_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    248 			compound_state_t *);
    249 static void	mds_op_secinfonn(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    250 			compound_state_t *);
    251 nfsstat4	do_rfs4_op_secinfo(struct compound_state *, char *, int,
    252     SECINFO4res *);
    253 
    254 static void	mds_op_secinfo_free(nfs_resop4 *, compound_state_t *);
    255 
    256 static void	mds_op_backchannel_ctl(nfs_argop4 *, nfs_resop4 *,
    257 			struct svc_req *, compound_state_t *);
    258 static void	mds_op_bind_conn_to_session(nfs_argop4 *, nfs_resop4 *,
    259 			struct svc_req *, compound_state_t *);
    260 static void	mds_op_create_clientid(nfs_argop4 *, nfs_resop4 *,
    261 			struct svc_req *, compound_state_t *);
    262 static void	mds_op_create_session(nfs_argop4 *, nfs_resop4 *,
    263 			struct svc_req *, compound_state_t *);
    264 static void	mds_op_destroy_session(nfs_argop4 *, nfs_resop4 *,
    265 			struct svc_req *, compound_state_t *);
    266 static void	mds_op_sequence(nfs_argop4 *, nfs_resop4 *,
    267 			struct svc_req *, compound_state_t *);
    268 
    269 static void mds_op_get_devlist(nfs_argop4 *, nfs_resop4 *,
    270 		struct svc_req *, compound_state_t *);
    271 
    272 static void mds_op_get_devinfo(nfs_argop4 *, nfs_resop4 *,
    273 		struct svc_req *, compound_state_t *);
    274 
    275 static void mds_op_layout_get(nfs_argop4 *, nfs_resop4 *,
    276 		struct svc_req *, compound_state_t *);
    277 static void mds_op_layout_get_free(nfs_resop4 *, compound_state_t *);
    278 
    279 static void mds_op_layout_commit(nfs_argop4 *, nfs_resop4 *,
    280 		struct svc_req *, compound_state_t *);
    281 
    282 static void mds_op_layout_return(nfs_argop4 *, nfs_resop4 *,
    283 		struct svc_req *, compound_state_t *);
    284 
    285 static void mds_op_reclaim_complete(nfs_argop4 *, nfs_resop4 *,
    286     struct svc_req *, compound_state_t *);
    287 
    288 static int	seq_chk_limits(nfs_argop4 *, nfs_resop4 *, compound_state_t *);
    289 
    290 nfsstat4 check_open_access(uint32_t,
    291 			struct compound_state *, struct svc_req *);
    292 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
    293 
    294 static void	mds_free_reply(nfs_resop4 *, compound_state_t *);
    295 
    296 vnode_t *do_rfs4_op_mknod(CREATE4args *, CREATE4res *, struct svc_req *,
    297 			struct compound_state *, vattr_t *, char *);
    298 
    299 nfsstat4 rfs4_do_lock(rfs4_lo_state_t *, nfs_lock_type4, seqid4,
    300 		offset4, length4, cred_t *, nfs_resop4 *);
    301 
    302 rfs4_lo_state_t *mds_findlo_state_by_owner(rfs4_lockowner_t *,
    303 	    rfs4_state_t *, bool_t *);
    304 
    305 bool_t in_flavor_list(int, int *, int);
    306 
    307 nfsstat4 attrmap4_to_vattrmask(attrmap4 *, struct nfs4_svgetit_arg *);
    308 
    309 nfsstat4 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *);
    310 
    311 nfsstat4 do_rfs4_op_getattr(attrmap4 *, fattr4 *, struct nfs4_svgetit_arg *);
    312 
    313 nfsstat4 do_rfs4_op_lookup(char *, uint_t, struct svc_req *,
    314 		struct compound_state *);
    315 
    316 rfs4_lockowner_t *mds_findlockowner_by_pid(nfs_server_instance_t *, pid_t);
    317 
    318 mds_session_t *mds_findsession_by_id(nfs_server_instance_t *, sessionid4);
    319 
    320 rfs4_openowner_t *mds_findopenowner(nfs_server_instance_t *, open_owner4 *,
    321     bool_t *);
    322 
    323 static void	mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    324 			compound_state_t *);
    325 
    326 extern mds_mpd_t *mds_find_mpd(nfs_server_instance_t *, id_t);
    327 extern void rfs41_lo_seqid(stateid_t *);
    328 extern void mds_delete_layout(vnode_t *);
    329 extern void mds_clean_grants_by_fsid(rfs4_client_t *, vnode_t *);
    330 extern mds_layout_t *mds_add_layout(layout_core_t *lc);
    331 
    332 nfsstat4
    333 create_vnode(vnode_t *, char *,  vattr_t *, createmode4, timespec32_t *,
    334     cred_t *, vnode_t **, bool_t *);
    335 
    336 
    337 /* HACKERY */
    338 nfsstat4 rfs4_get_all_state(struct compound_state *, stateid4 *,
    339     rfs4_state_t **, rfs4_deleg_state_t **, rfs4_lo_state_t **);
    340 
    341 void rfs4_ss_clid(struct compound_state *, rfs4_client_t *, struct svc_req *);
    342 void rfs4_ss_chkclid(struct compound_state *, rfs4_client_t *);
    343 
    344 int layout_match(stateid_t, stateid4, nfsstat4 *);
    345 
    346 extern stateid4 special0;
    347 extern stateid4 special1;
    348 
    349 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
    350 			stateid4_cmp(id, &special1))
    351 
    352 void rfs4_cn_release(compound_state_t *);
    353 
    354 mds_layout_grant_t *rfs41_findlogrant(struct compound_state *,
    355     rfs4_file_t *, rfs4_client_t *, bool_t *);
    356 void rfs41_lo_grant_rele(mds_layout_grant_t *);
    357 mds_ever_grant_t *rfs41_findevergrant(rfs4_client_t *, vnode_t *, bool_t *);
    358 void rfs41_ever_grant_rele(mds_ever_grant_t *);
    359 
    360 static uint32_t compute_use_pnfs_flags(uint32_t);
    361 
    362 /* ARGSUSED */
    363 static void
    364 mds_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    365 	compound_state_t *cs)
    366 {
    367 	DTRACE_NFSV4_1(op__notsup__start,
    368 	    strcut compound_state *, cs);
    369 
    370 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
    371 
    372 	DTRACE_NFSV4_1(op__notsup__done,
    373 	    struct compound_state *, cs);
    374 }
    375 
    376 /* ARGSUSED */
    377 static void
    378 mds_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    379 	compound_state_t *cs)
    380 {
    381 	DTRACE_NFSV4_1(op__illegal__start,
    382 	    struct compound_state *, cs);
    383 
    384 	*cs->statusp =
    385 	    *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_OP_ILLEGAL;
    386 
    387 	DTRACE_NFSV4_1(op__illegal__done,
    388 	    struct compound_state *, cs);
    389 }
    390 
    391 /* ARGSUSED */
    392 static void
    393 mds_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    394 	compound_state_t *cs)
    395 {
    396 	DTRACE_NFSV4_1(op__inval__start,
    397 	    struct compound_state *, cs);
    398 
    399 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
    400 
    401 	DTRACE_NFSV4_1(op__inval__done,
    402 	    struct compound_state *, cs);
    403 }
    404 
    405 /*ARGSUSED*/
    406 static void
    407 nullfree(nfs_resop4 *resop, compound_state_t *cs)
    408 {
    409 }
    410 
    411 static op_disp_tbl_t mds_disptab[] = {
    412 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 0"},
    413 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 1"},
    414 	{mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 2"},
    415 	{mds_op_access, nullfree, DISP_OP_MDS, "ACCESS"},
    416 	{mds_op_close, nullfree, DISP_OP_MDS, "CLOSE"},
    417 	{mds_op_commit, nullfree, DISP_OP_BOTH, "COMMIT"},
    418 	{mds_op_create, nullfree, DISP_OP_MDS, "CREATE"},
    419 	{mds_op_inval, nullfree, DISP_OP_BAD, "BAD Op 7"},
    420 	{mds_op_delegreturn, nullfree, DISP_OP_MDS, "DELEGRETURN"},
    421 	{mds_op_getattr, mds_op_getattr_free, DISP_OP_MDS, "GETATTR"},
    422 	{mds_op_getfh, mds_op_getfh_free, DISP_OP_MDS, "GETFH"},
    423 	{mds_op_link, nullfree, DISP_OP_MDS, "LINK"},
    424 	{mds_op_lock, mds_lock_denied_free, DISP_OP_MDS, "LOCK"},
    425 	{mds_op_lockt, mds_lock_denied_free,  DISP_OP_MDS, "LOCKT"},
    426 	{mds_op_locku, nullfree,  DISP_OP_MDS, "LOCKU"},
    427 	{mds_op_lookup, nullfree,  DISP_OP_MDS, "LOOKUP"},
    428 	{mds_op_lookupp, nullfree,  DISP_OP_MDS, "LOOKUPP"},
    429 	{mds_op_nverify, nullfree,  DISP_OP_MDS, "NVERIFY"},
    430 	{mds_op_open, mds_free_reply,  DISP_OP_MDS, "OPEN"},
    431 	{mds_op_openattr, nullfree,  DISP_OP_MDS, "OPENATTR"},
    432 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 20"},
    433 	{mds_op_open_downgrade, nullfree,  DISP_OP_MDS, "OPEN_DOWNGRADE"},
    434 	{mds_op_putfh, nullfree, DISP_OP_BOTH, "PUTFH"},
    435 	{mds_op_putpubfh, nullfree,  DISP_OP_MDS, "PUTPUBFH"},
    436 	{mds_op_putrootfh, nullfree,  DISP_OP_MDS, "PUTROOTFH"},
    437 	{mds_op_read, mds_op_read_free, DISP_OP_BOTH, "READ"},
    438 	{mds_op_readdir, mds_op_readdir_free,  DISP_OP_MDS, "READDIR"},
    439 	{mds_op_readlink, mds_op_readlink_free,  DISP_OP_MDS, "READLINK"},
    440 	{mds_op_remove, nullfree,  DISP_OP_MDS, "REMOVE"},
    441 	{mds_op_rename, nullfree,  DISP_OP_MDS, "RENAME"},
    442 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 30"},
    443 	{mds_op_restorefh, nullfree,  DISP_OP_MDS, "RESTOREFH"},
    444 	{mds_op_savefh, nullfree,  DISP_OP_MDS, "SAVEFH"},
    445 	{mds_op_secinfo, mds_op_secinfo_free,  DISP_OP_MDS, "SECINFO"},
    446 	{mds_op_setattr, nullfree,  DISP_OP_MDS, "SETATTR"},
    447 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 35"},
    448 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 36"},
    449 	{mds_op_verify, nullfree,  DISP_OP_MDS, "VERIFY"},
    450 	{mds_op_write, nullfree, DISP_OP_BOTH, "WRITE"},
    451 	{mds_op_notsup, nullfree,  DISP_OP_BAD, "BAD Op 39"},
    452 	{mds_op_backchannel_ctl, nullfree,  DISP_OP_BOTH, "BACKCHANNEL_CTL"},
    453 	{mds_op_bind_conn_to_session, nullfree,
    454 	    DISP_OP_BOTH, "BIND_CONN_TO_SESS"},
    455 	{mds_op_exchange_id, mds_op_exid_free,  DISP_OP_BOTH, "EXCHANGE_ID"},
    456 	{mds_op_create_session, nullfree,  DISP_OP_BOTH, "CREATE_SESS"},
    457 	{mds_op_destroy_session, nullfree,  DISP_OP_BOTH, "DESTROY_SESS"},
    458 	{mds_op_illegal, nullfree,  DISP_OP_MDS, "FREE_STATEID"},
    459 	{mds_op_illegal, nullfree,  DISP_OP_MDS, "GET_DIR_DELEG"},
    460 	{mds_op_get_devinfo, nullfree,  DISP_OP_MDS, "GET_DEVINFO"},
    461 	{mds_op_get_devlist, nullfree,  DISP_OP_MDS, "GET_DEVLIST"},
    462 	{mds_op_layout_commit, nullfree,  DISP_OP_MDS, "LAYOUT_COMMIT"},
    463 	{mds_op_layout_get, mds_op_layout_get_free,  DISP_OP_MDS, "LAYOUT_GET"},
    464 	{mds_op_layout_return, nullfree,  DISP_OP_MDS, "LAYOUT_RETURN"},
    465 	{mds_op_secinfonn, nullfree,
    466 	    DISP_OP_BOTH, "SECINFO_NONAME"},
    467 	{mds_op_sequence, nullfree,  DISP_OP_BOTH, "SEQUENCE"},
    468 	{mds_op_notsup, nullfree,  DISP_OP_BOTH, "SET_SSV"},
    469 	{mds_op_notsup, nullfree,  DISP_OP_MDS, "TEST_STATEID"},
    470 	{mds_op_notsup, nullfree,  DISP_OP_MDS, "WANT_DELEG"},
    471 	{mds_op_notsup, nullfree,  DISP_OP_BOTH, "DESTROY_CLIENTID"},
    472 	{mds_op_reclaim_complete, nullfree,  DISP_OP_MDS, "RECLAIM_COMPLETE"}
    473 };
    474 
    475 static uint_t mds_disp_cnt = sizeof (mds_disptab) / sizeof (mds_disptab[0]);
    476 
    477 #define	OP_ILLEGAL_IDX (mds_disp_cnt)
    478 
    479 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
    480 
    481 #ifdef	nextdp
    482 #undef nextdp
    483 #endif
    484 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
    485 
    486 /*ARGSUSED*/
    487 static void
    488 mds_op_readdir_free(nfs_resop4 *resop, compound_state_t *cs)
    489 {
    490 	/* Common function used for NFSv4.0 and NFSv4.1 */
    491 	rfs4_op_readdir_free(resop);
    492 }
    493 
    494 /*ARGSUSED*/
    495 static void
    496 mds_op_secinfo_free(nfs_resop4 *resop, compound_state_t *cs)
    497 {
    498 	/* Common function used for NFSv4.0 and NFSv4.1 */
    499 	rfs4_op_secinfo_free(resop);
    500 }
    501 
    502 /*
    503  */
    504 void
    505 mds_srvrfini(void)
    506 {
    507 	/* some shutdown stuff for the minor verson 1 server */
    508 }
    509 
    510 nfsstat4	rfs4_state_has_access(rfs4_state_t *, int, vnode_t *);
    511 int		rfs4_verify_attr(struct nfs4_svgetit_arg *, attrmap4 *,
    512 		    struct nfs4_ntov_table *);
    513 
    514 
    515 /*
    516  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
    517  * the file is being truncated, return NFS4_OK if allowed or approriate
    518  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
    519  * the associated file will be done if the I/O is not consistent with any
    520  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
    521  * as reader or writer as appropriate. rfs4_op_open will accquire the
    522  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
    523  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
    524  * deleg parameter, we will return whether a write delegation is held by
    525  * the client associated with this stateid.
    526  * If the server instance associated with the relevant client is in its
    527  * grace period, return NFS4ERR_GRACE.
    528  */
    529 nfsstat4
    530 mds_validate_stateid(int mode, struct compound_state *cs, vnode_t *vp,
    531     stateid4 *stateid, bool_t trunc, bool_t *deleg, bool_t do_access)
    532 {
    533 	rfs4_file_t *fp;
    534 	bool_t create = FALSE;
    535 	rfs4_state_t *sp;
    536 	rfs4_deleg_state_t *dsp;
    537 	rfs4_lo_state_t *lsp;
    538 	stateid_t *id = (stateid_t *)stateid;
    539 	nfsstat4 stat = NFS4_OK;
    540 
    541 	if (ISSPECIAL(stateid)) {
    542 		fp = rfs4_findfile(cs->instp, vp, NULL, &create);
    543 		if (fp == NULL)
    544 			return (NFS4_OK);
    545 		if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) {
    546 			rfs4_file_rele(fp);
    547 			return (NFS4_OK);
    548 		}
    549 		if (mode == FWRITE ||
    550 		    fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) {
    551 			rfs4_recall_deleg(fp, trunc, NULL);
    552 			rfs4_file_rele(fp);
    553 			return (NFS4ERR_DELAY);
    554 		}
    555 		rfs4_file_rele(fp);
    556 		return (NFS4_OK);
    557 	}
    558 
    559 	stat = rfs4_get_all_state(cs, stateid, &sp, &dsp, &lsp);
    560 	if (stat != NFS4_OK)
    561 		return (stat);
    562 
    563 	/*
    564 	 * Ordering of the following 'if' statements is specific
    565 	 * since rfs4_get_all_state() may return a value for sp and
    566 	 * lsp. First we check lsp, then 'fall' through to sp.
    567 	 */
    568 	if (lsp != NULL) {
    569 		/* Is associated server instance in its grace period? */
    570 		if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
    571 			rfs4_lo_state_rele(lsp, FALSE);
    572 			if (sp != NULL)
    573 				rfs4_dbe_rele(sp->rs_dbe);
    574 			return (NFS4ERR_GRACE);
    575 		}
    576 
    577 		if (lsp->rls_lockid.v41_bits.chgseq != 0) {
    578 			/* Seqid in the future? - that's bad */
    579 			if (lsp->rls_lockid.v41_bits.chgseq <
    580 			    id->v41_bits.chgseq) {
    581 				rfs4_lo_state_rele(lsp, FALSE);
    582 				if (sp != NULL)
    583 					rfs4_dbe_rele(sp->rs_dbe);
    584 				return (NFS4ERR_BAD_STATEID);
    585 			}
    586 			/* Seqid in the past? - that's old */
    587 			if (lsp->rls_lockid.v41_bits.chgseq >
    588 			    id->v41_bits.chgseq) {
    589 				rfs4_lo_state_rele(lsp, FALSE);
    590 				if (sp != NULL)
    591 					rfs4_dbe_rele(sp->rs_dbe);
    592 				return (NFS4ERR_OLD_STATEID);
    593 			}
    594 		}
    595 
    596 		/* Ensure specified filehandle matches */
    597 		if (lsp->rls_state->rs_finfo->rf_vp != vp) {
    598 			rfs4_lo_state_rele(lsp, FALSE);
    599 			if (sp != NULL)
    600 				rfs4_dbe_rele(sp->rs_dbe);
    601 			return (NFS4ERR_BAD_STATEID);
    602 		}
    603 		rfs4_lo_state_rele(lsp, FALSE);
    604 	}
    605 
    606 	/*
    607 	 * Stateid provided was an "open" or via the lock stateid
    608 	 */
    609 	if (sp != NULL) {
    610 		/*
    611 		 * only check if the passed in stateid was an OPENID,
    612 		 * ie. Skip if we got here via the LOCKID.
    613 		 */
    614 		if (id->v41_bits.type == OPENID) {
    615 			/* Is associated server instance in its grace period? */
    616 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
    617 				rfs4_dbe_rele(sp->rs_dbe);
    618 				return (NFS4ERR_GRACE);
    619 			}
    620 
    621 			if (sp->rs_stateid.v41_bits.chgseq != 0) {
    622 				/* Seqid in the future? - that's bad */
    623 				if (sp->rs_stateid.v41_bits.chgseq <
    624 				    id->v41_bits.chgseq) {
    625 					rfs4_dbe_rele(sp->rs_dbe);
    626 					return (NFS4ERR_BAD_STATEID);
    627 				}
    628 				/* Seqid in the past - that's old */
    629 				if (sp->rs_stateid.v41_bits.chgseq >
    630 				    id->v41_bits.chgseq) {
    631 					rfs4_dbe_rele(sp->rs_dbe);
    632 					return (NFS4ERR_OLD_STATEID);
    633 				}
    634 			}
    635 
    636 			/* Ensure specified filehandle matches */
    637 			if (sp->rs_finfo->rf_vp != vp) {
    638 				rfs4_dbe_rele(sp->rs_dbe);
    639 				return (NFS4ERR_BAD_STATEID);
    640 			}
    641 		}
    642 		if (sp->rs_owner->ro_need_confirm) {
    643 			rfs4_dbe_rele(sp->rs_dbe);
    644 			return (NFS4ERR_BAD_STATEID);
    645 		}
    646 
    647 		if (sp->rs_closed == TRUE) {
    648 			rfs4_dbe_rele(sp->rs_dbe);
    649 			return (NFS4ERR_OLD_STATEID);
    650 		}
    651 
    652 		if (do_access)
    653 			stat = rfs4_state_has_access(sp, mode, vp);
    654 		else
    655 			stat = NFS4_OK;
    656 
    657 		/*
    658 		 * Return whether this state has write
    659 		 * delegation if desired
    660 		 */
    661 		if (deleg &&
    662 		    (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE))
    663 			*deleg = TRUE;
    664 
    665 		/*
    666 		 * We got a valid stateid, so we update the
    667 		 * lease on the client. Ideally we would like
    668 		 * to do this after the calling op succeeds,
    669 		 * but for now this will be good
    670 		 * enough. Callers of this routine are
    671 		 * currently insulated from the state stuff.
    672 		 */
    673 		rfs4_update_lease(sp->rs_owner->ro_client);
    674 
    675 		/*
    676 		 * If a delegation is present on this file and
    677 		 * this is a WRITE, then update the lastwrite
    678 		 * time to indicate that activity is present.
    679 		 */
    680 		if (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE &&
    681 		    mode == FWRITE) {
    682 			sp->rs_finfo->rf_dinfo->rd_time_lastwrite =
    683 			    gethrestime_sec();
    684 		}
    685 
    686 		rfs4_dbe_rele(sp->rs_dbe);
    687 		return (stat);
    688 	}
    689 
    690 	if (dsp != NULL) {
    691 		/* Is associated server instance in its grace period? */
    692 		if (rfs4_clnt_in_grace(dsp->rds_client)) {
    693 			rfs4_deleg_state_rele(dsp);
    694 			return (NFS4ERR_GRACE);
    695 		}
    696 
    697 		if ((dsp->rds_delegid.v41_bits.chgseq != 0) &&
    698 		    (dsp->rds_delegid.v41_bits.chgseq != id->v41_bits.chgseq)) {
    699 			rfs4_deleg_state_rele(dsp);
    700 			return (NFS4ERR_BAD_STATEID);
    701 		}
    702 
    703 		/* Ensure specified filehandle matches */
    704 		if (dsp->rds_finfo->rf_vp != vp) {
    705 			rfs4_deleg_state_rele(dsp);
    706 			return (NFS4ERR_BAD_STATEID);
    707 		}
    708 		/*
    709 		 * Return whether this state has write
    710 		 * delegation if desired
    711 		 */
    712 		if (deleg &&
    713 		    (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE))
    714 			*deleg = TRUE;
    715 
    716 		rfs4_update_lease(dsp->rds_client);
    717 
    718 		/*
    719 		 * If a delegation is present on this file and
    720 		 * this is a WRITE, then update the lastwrite
    721 		 * time to indicate that activity is present.
    722 		 */
    723 		if (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE &&
    724 		    mode == FWRITE) {
    725 			dsp->rds_finfo->rf_dinfo->rd_time_lastwrite =
    726 			    gethrestime_sec();
    727 		}
    728 
    729 		/*
    730 		 * XXX - what happens if this is a WRITE and the
    731 		 * delegation type of for READ.
    732 		 */
    733 		rfs4_deleg_state_rele(dsp);
    734 
    735 		return (stat);
    736 	}
    737 	/*
    738 	 * If we got this far, something bad happened
    739 	 */
    740 	return (NFS4ERR_BAD_STATEID);
    741 }
    742 
    743 nfsstat4
    744 mds_setattr(attrmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
    745     stateid4 *stateid)
    746 {
    747 	int error = 0;
    748 	struct nfs4_svgetit_arg sarg;
    749 	bool_t trunc;
    750 
    751 	nfsstat4 status = NFS4_OK;
    752 	cred_t *cr = cs->cr;
    753 	vnode_t *vp = cs->vp;
    754 	struct nfs4_ntov_table ntov;
    755 	struct statvfs64 sb;
    756 	struct vattr bva;
    757 	struct flock64 bf;
    758 	int in_crit = 0;
    759 	uint_t saved_mask = 0;
    760 	caller_context_t ct;
    761 	attrvers_t avers;
    762 	struct nfs4_ntov_map *nvmap;
    763 
    764 	avers = RFS4_ATTRVERS(cs);
    765 	nvmap = NFS4_NTOV_MAP(avers);
    766 	*resp = NFS4_EMPTY_ATTRMAP(avers);
    767 	sarg.sbp = &sb;
    768 	nfs4_ntov_table_init(&ntov, avers);
    769 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
    770 	    NFS4ATTR_SETIT);
    771 	if (status != NFS4_OK) {
    772 		/*
    773 		 * failed set attrs
    774 		 */
    775 		goto done;
    776 	}
    777 
    778 	if (sarg.vap->va_mask == 0 && ! ATTR_ISSET(fattrp->attrmask, ACL) &&
    779 	    ! ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) {
    780 		/*
    781 		 * no further work to be done
    782 		 */
    783 		goto done;
    784 	}
    785 
    786 	ct.cc_sysid = 0;
    787 	ct.cc_pid = 0;
    788 	ct.cc_caller_id = cs->instp->caller_id;
    789 	ct.cc_flags = CC_DONTBLOCK;
    790 
    791 	/*
    792 	 * If we got a request to set the ACL and the MODE, only
    793 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
    794 	 * to change any other bits, along with setting an ACL,
    795 	 * gives NFS4ERR_INVAL.
    796 	 */
    797 	if (ATTR_ISSET(fattrp->attrmask, ACL) &&
    798 	    ATTR_ISSET(fattrp->attrmask, MODE)) {
    799 		vattr_t va;
    800 
    801 		va.va_mask = AT_MODE;
    802 		error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
    803 		if (error) {
    804 			status = puterrno4(error);
    805 			goto done;
    806 		}
    807 		if ((sarg.vap->va_mode ^ va.va_mode) &
    808 		    ~(VSUID | VSGID | VSVTX)) {
    809 			status = NFS4ERR_INVAL;
    810 			goto done;
    811 		}
    812 	}
    813 
    814 	/* Check stateid only if size has been set */
    815 	if (sarg.vap->va_mask & AT_SIZE) {
    816 		trunc = (sarg.vap->va_size == 0);
    817 		status = mds_validate_stateid(FWRITE,
    818 		    cs, cs->vp, stateid, trunc,
    819 		    &cs->deleg, sarg.vap->va_mask & AT_SIZE);
    820 		if (status != NFS4_OK)
    821 			goto done;
    822 	}
    823 
    824 	/* XXX start of possible race with delegations */
    825 
    826 	/*
    827 	 * We need to specially handle size changes because it is
    828 	 * possible for the client to create a file with read-only
    829 	 * modes, but with the file opened for writing. If the client
    830 	 * then tries to set the file size, e.g. ftruncate(3C),
    831 	 * fcntl(F_FREESP), the normal access checking done in
    832 	 * VOP_SETATTR would prevent the client from doing it even though
    833 	 * it should be allowed to do so.  To get around this, we do the
    834 	 * access checking for ourselves and use VOP_SPACE which doesn't
    835 	 * do the access checking.
    836 	 * Also the client should not be allowed to change the file
    837 	 * size if there is a conflicting non-blocking mandatory lock in
    838 	 * the region of the change.
    839 	 */
    840 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
    841 		u_offset_t offset;
    842 		ssize_t length;
    843 
    844 		/*
    845 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
    846 		 * before returning, sarg.vap->va_mask is used to
    847 		 * generate the setattr reply bitmap.  We also clear
    848 		 * AT_SIZE below before calling VOP_SPACE.  For both
    849 		 * of these cases, the va_mask needs to be saved here
    850 		 * and restored after calling VOP_SETATTR.
    851 		 */
    852 		saved_mask = sarg.vap->va_mask;
    853 
    854 		/*
    855 		 * Check any possible conflict due to NBMAND locks.
    856 		 * Get into critical region before VOP_GETATTR, so the
    857 		 * size attribute is valid when checking conflicts.
    858 		 */
    859 		if (nbl_need_check(vp)) {
    860 			nbl_start_crit(vp, RW_READER);
    861 			in_crit = 1;
    862 		}
    863 
    864 		bva.va_mask = AT_UID|AT_SIZE;
    865 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
    866 			status = puterrno4(error);
    867 			goto done;
    868 		}
    869 
    870 		if (in_crit) {
    871 			if (sarg.vap->va_size < bva.va_size) {
    872 				offset = sarg.vap->va_size;
    873 				length = bva.va_size - sarg.vap->va_size;
    874 			} else {
    875 				offset = bva.va_size;
    876 				length = sarg.vap->va_size - bva.va_size;
    877 			}
    878 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
    879 			    &ct)) {
    880 				status = NFS4ERR_LOCKED;
    881 				goto done;
    882 			}
    883 		}
    884 
    885 		if (crgetuid(cr) == bva.va_uid) {
    886 			sarg.vap->va_mask &= ~AT_SIZE;
    887 			bf.l_type = F_WRLCK;
    888 			bf.l_whence = 0;
    889 			bf.l_start = (off64_t)sarg.vap->va_size;
    890 			bf.l_len = 0;
    891 			bf.l_sysid = 0;
    892 			bf.l_pid = 0;
    893 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
    894 			    (offset_t)sarg.vap->va_size, cr, &ct);
    895 		}
    896 	}
    897 
    898 	if (!error && sarg.vap->va_mask != 0)
    899 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
    900 
    901 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
    902 	if (saved_mask & AT_SIZE)
    903 		sarg.vap->va_mask |= AT_SIZE;
    904 
    905 	/*
    906 	 * If an ACL was being set, it has been delayed until now,
    907 	 * in order to set the mode (via the VOP_SETATTR() above) first.
    908 	 */
    909 	if (! error && ATTR_ISSET(fattrp->attrmask, ACL)) {
    910 		int i;
    911 
    912 		for (i = 0; i < ntov.attrcnt; i++)
    913 			if (ntov.amap[i] == FATTR4_ACL)
    914 				break;
    915 		if (i < ntov.attrcnt) {
    916 			error = (*nvmap[FATTR4_ACL].sv_getit)(NFS4ATTR_SETIT,
    917 			    &sarg, &ntov.na[i]);
    918 			if (error == 0) {
    919 				ATTR_SET(*resp, ACL);
    920 			} else if (error == ENOTSUP) {
    921 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
    922 				status = NFS4ERR_ATTRNOTSUPP;
    923 				goto done;
    924 			}
    925 		} else {
    926 			error = EINVAL;
    927 		}
    928 	}
    929 
    930 	if (! error && ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) {
    931 		/*
    932 		 * Store layout hint.  Layout hint will be stored
    933 		 * in file struct (which means it can only be set
    934 		 * when the file is open).  If layout hint is allowed
    935 		 * for files not open, then it must be stored
    936 		 * persistently.
    937 		 *
    938 		 * status assignment placates lint.  it will
    939 		 * be replaced with code to store the layout
    940 		 * hint.
    941 		 */
    942 		status = NFS4_OK;
    943 	}
    944 
    945 	if (error) {
    946 		/* check if a monitor detected a delegation conflict */
    947 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
    948 			status = NFS4ERR_DELAY;
    949 		else
    950 			status = puterrno4(error);
    951 
    952 		/*
    953 		 * Set the response bitmap when setattr failed.
    954 		 * If VOP_SETATTR partially succeeded, test by doing a
    955 		 * VOP_GETATTR on the object and comparing the data
    956 		 * to the setattr arguments.
    957 		 */
    958 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
    959 	} else {
    960 		/*
    961 		 * Force modified metadata out to stable storage.
    962 		 */
    963 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
    964 		/*
    965 		 * Set response bitmap
    966 		 */
    967 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
    968 	}
    969 
    970 	/* Return early and already have a NFSv4 error */
    971 done:
    972 	/*
    973 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
    974 	 * conversion sets both readable and writeable NFS4 attrs
    975 	 * for AT_MTIME and AT_ATIME.  The line below masks out
    976 	 * unrequested attrs from the setattr result bitmap.  This
    977 	 * is placed after the done: label to catch the ATTRNOTSUP
    978 	 * case.
    979 	 */
    980 	ATTRMAP_MASK(*resp, fattrp->attrmask);
    981 
    982 	if (in_crit)
    983 		nbl_end_crit(vp);
    984 
    985 	nfs4_ntov_table_free(&ntov, &sarg);
    986 
    987 	return (status);
    988 }
    989 
    990 /* ARGSUSED */
    991 void
    992 mds_op_secinfonn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    993     compound_state_t *cs)
    994 {
    995 	SECINFO_NO_NAME4res *respnn;
    996 	int dotdot;
    997 
    998 	DTRACE_NFSV4_1(op__secinfo__no__name__start,
    999 	    struct compound_state *, cs);
   1000 
   1001 	respnn = &resop->nfs_resop4_u.opsecinfo_no_name;
   1002 
   1003 	/*
   1004 	 * Current file handle (cfh) should have been set before
   1005 	 * getting into this function. If not, return error.
   1006 	 */
   1007 	if (cs->vp == NULL) {
   1008 		*cs->statusp = respnn->status = NFS4ERR_NOFILEHANDLE;
   1009 		goto final;
   1010 	}
   1011 
   1012 	dotdot =
   1013 	    (argop->nfs_argop4_u.opsecinfo_no_name == SECINFO_STYLE4_PARENT);
   1014 
   1015 	*cs->statusp = respnn->status = do_rfs4_op_secinfo(cs, NULL,
   1016 	    dotdot, (SECINFO4res *)respnn);
   1017 
   1018 final:
   1019 	DTRACE_NFSV4_2(op__secinfo__no__name__done,
   1020 	    struct compound_state *, cs,
   1021 	    SECINFO_NO_NAME4res *, respnn);
   1022 }
   1023 
   1024 /* ARGSUSED */
   1025 void
   1026 mds_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1027     compound_state_t *cs)
   1028 {
   1029 	SECINFO4res *resp;
   1030 	utf8string *utfnm;
   1031 	uint_t len, dotdot;
   1032 	char *nm;
   1033 
   1034 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
   1035 
   1036 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
   1037 	    SECINFO4args *, args);
   1038 
   1039 	resp = &resop->nfs_resop4_u.opsecinfo;
   1040 
   1041 	/*
   1042 	 * Current file handle (cfh) should have been set before
   1043 	 * getting into this function. If not, return error.
   1044 	 */
   1045 	if (cs->vp == NULL) {
   1046 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1047 		goto final;
   1048 	}
   1049 	if (cs->vp->v_type != VDIR) {
   1050 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1051 		goto final;
   1052 	}
   1053 
   1054 	/*
   1055 	 * Verify the component name. If failed, error out, but
   1056 	 * do not error out if the component name is a "..".
   1057 	 * SECINFO will return its parents secinfo data for SECINFO "..".
   1058 	 */
   1059 	utfnm = &argop->nfs_argop4_u.opsecinfo.name;
   1060 	if (!utf8_dir_verify(utfnm)) {
   1061 		if (utfnm->utf8string_len != 2 ||
   1062 		    utfnm->utf8string_val[0] != '.' ||
   1063 		    utfnm->utf8string_val[1] != '.') {
   1064 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1065 			goto final;
   1066 		}
   1067 		dotdot = 1;
   1068 	} else
   1069 		dotdot = 0;
   1070 
   1071 	nm = utf8_to_str(utfnm, &len, NULL);
   1072 	if (nm == NULL) {
   1073 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1074 		goto final;
   1075 	}
   1076 
   1077 	if (len > MAXNAMELEN) {
   1078 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1079 		kmem_free(nm, len);
   1080 		goto final;
   1081 	}
   1082 
   1083 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, dotdot, resp);
   1084 
   1085 	kmem_free(nm, len);
   1086 
   1087 final:
   1088 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
   1089 	    SECINFO4res *, resp);
   1090 }
   1091 
   1092 /*
   1093  * verify and nverify are exactly the same, except that nverify
   1094  * succeeds when some argument changed, and verify succeeds when
   1095  * when none changed.
   1096  */
   1097 
   1098 /* ARGSUSED */
   1099 void
   1100 mds_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1101     compound_state_t *cs)
   1102 {
   1103 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
   1104 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
   1105 	int error;
   1106 	struct nfs4_svgetit_arg sarg;
   1107 	struct statvfs64 sb;
   1108 	struct nfs4_ntov_table ntov;
   1109 
   1110 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
   1111 	    VERIFY4args *, args);
   1112 
   1113 	if (cs->vp == NULL) {
   1114 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1115 		goto final;
   1116 	}
   1117 
   1118 	sarg.sbp = &sb;
   1119 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1120 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   1121 	    &sarg, &ntov, NFS4ATTR_VERIT);
   1122 	if (resp->status != NFS4_OK) {
   1123 		/*
   1124 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   1125 		 * so could return -1 for "no match".
   1126 		 */
   1127 		if (resp->status == -1)
   1128 			resp->status = NFS4ERR_NOT_SAME;
   1129 		goto done;
   1130 	}
   1131 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   1132 	switch (error) {
   1133 	case 0:
   1134 		resp->status = NFS4_OK;
   1135 		break;
   1136 	case -1:
   1137 		resp->status = NFS4ERR_NOT_SAME;
   1138 		break;
   1139 	default:
   1140 		resp->status = puterrno4(error);
   1141 		break;
   1142 	}
   1143 done:
   1144 	*cs->statusp = resp->status;
   1145 	nfs4_ntov_table_free(&ntov, &sarg);
   1146 
   1147 final:
   1148 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
   1149 	    VERIFY4res *, resp);
   1150 }
   1151 
   1152 /* ARGSUSED */
   1153 void
   1154 mds_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1155     compound_state_t *cs)
   1156 {
   1157 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
   1158 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
   1159 	int error;
   1160 	struct nfs4_svgetit_arg sarg;
   1161 	struct statvfs64 sb;
   1162 	struct nfs4_ntov_table ntov;
   1163 
   1164 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
   1165 	    NVERIFY4args *, args);
   1166 
   1167 	if (cs->vp == NULL) {
   1168 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1169 		goto final;
   1170 	}
   1171 	sarg.sbp = &sb;
   1172 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1173 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   1174 	    &sarg, &ntov, NFS4ATTR_VERIT);
   1175 	if (resp->status != NFS4_OK) {
   1176 		/*
   1177 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   1178 		 * so could return -1 for "no match".
   1179 		 */
   1180 		if (resp->status == -1)
   1181 			resp->status = NFS4_OK;
   1182 		goto done;
   1183 	}
   1184 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   1185 	switch (error) {
   1186 	case 0:
   1187 		resp->status = NFS4ERR_SAME;
   1188 		break;
   1189 	case -1:
   1190 		resp->status = NFS4_OK;
   1191 		break;
   1192 	default:
   1193 		resp->status = puterrno4(error);
   1194 		break;
   1195 	}
   1196 done:
   1197 	*cs->statusp = resp->status;
   1198 	nfs4_ntov_table_free(&ntov, &sarg);
   1199 
   1200 final:
   1201 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   1202 	    NVERIFY4res *, resp);
   1203 
   1204 }
   1205 
   1206 /* ARGSUSED */
   1207 void
   1208 mds_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1209     compound_state_t *cs)
   1210 {
   1211 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
   1212 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
   1213 	int error;
   1214 	vnode_t *vp;
   1215 	struct vattr va;
   1216 	int checkwriteperm;
   1217 	cred_t *cr = cs->cr;
   1218 	bslabel_t *clabel, *slabel;
   1219 	ts_label_t *tslabel;
   1220 	boolean_t admin_low_client;
   1221 
   1222 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
   1223 	    ACCESS4args *, args);
   1224 
   1225 	if (cs->vp == NULL) {
   1226 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1227 		goto final;
   1228 	}
   1229 
   1230 	ASSERT(cr != NULL);
   1231 
   1232 	vp = cs->vp;
   1233 
   1234 	/*
   1235 	 * If the file system is exported read only, it is not appropriate
   1236 	 * to check write permissions for regular files and directories.
   1237 	 * Special files are interpreted by the client, so the underlying
   1238 	 * permissions are sent back to the client for interpretation.
   1239 	 */
   1240 	if (rdonly4(cs->exi, cs->vp, req) &&
   1241 	    (vp->v_type == VREG || vp->v_type == VDIR))
   1242 		checkwriteperm = 0;
   1243 	else
   1244 		checkwriteperm = 1;
   1245 
   1246 	/*
   1247 	 * XXX
   1248 	 * We need the mode so that we can correctly determine access
   1249 	 * permissions relative to a mandatory lock file.  Access to
   1250 	 * mandatory lock files is denied on the server, so it might
   1251 	 * as well be reflected to the server during the open.
   1252 	 */
   1253 	va.va_mask = AT_MODE;
   1254 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1255 	if (error) {
   1256 		*cs->statusp = resp->status = puterrno4(error);
   1257 		goto final;
   1258 	}
   1259 	resp->access = 0;
   1260 	resp->supported = 0;
   1261 
   1262 	if (is_system_labeled()) {
   1263 		ASSERT(req->rq_label != NULL);
   1264 		clabel = req->rq_label;
   1265 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
   1266 		    "got client label from request(1)",
   1267 		    struct svc_req *, req);
   1268 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   1269 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
   1270 				*cs->statusp = resp->status = puterrno4(EACCES);
   1271 				goto final;
   1272 			}
   1273 			slabel = label2bslabel(tslabel);
   1274 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
   1275 			    char *, "got server label(1) for vp(2)",
   1276 			    bslabel_t *, slabel, vnode_t *, vp);
   1277 
   1278 			admin_low_client = B_FALSE;
   1279 		} else
   1280 			admin_low_client = B_TRUE;
   1281 	}
   1282 
   1283 	if (args->access & ACCESS4_READ) {
   1284 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
   1285 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1286 		    (!is_system_labeled() || admin_low_client ||
   1287 		    bldominates(clabel, slabel)))
   1288 			resp->access |= ACCESS4_READ;
   1289 		resp->supported |= ACCESS4_READ;
   1290 	}
   1291 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
   1292 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1293 		if (!error && (!is_system_labeled() || admin_low_client ||
   1294 		    bldominates(clabel, slabel)))
   1295 			resp->access |= ACCESS4_LOOKUP;
   1296 		resp->supported |= ACCESS4_LOOKUP;
   1297 	}
   1298 	if (checkwriteperm &&
   1299 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
   1300 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1301 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1302 		    (!is_system_labeled() || admin_low_client ||
   1303 		    blequal(clabel, slabel)))
   1304 			resp->access |=
   1305 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
   1306 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
   1307 	}
   1308 
   1309 	if (checkwriteperm &&
   1310 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
   1311 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1312 		if (!error && (!is_system_labeled() || admin_low_client ||
   1313 		    blequal(clabel, slabel)))
   1314 			resp->access |= ACCESS4_DELETE;
   1315 		resp->supported |= ACCESS4_DELETE;
   1316 	}
   1317 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
   1318 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1319 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1320 		    (!is_system_labeled() || admin_low_client ||
   1321 		    bldominates(clabel, slabel)))
   1322 			resp->access |= ACCESS4_EXECUTE;
   1323 		resp->supported |= ACCESS4_EXECUTE;
   1324 	}
   1325 
   1326 	if (is_system_labeled() && !admin_low_client)
   1327 		label_rele(tslabel);
   1328 
   1329 	*cs->statusp = resp->status = NFS4_OK;
   1330 
   1331 final:
   1332 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
   1333 	    ACCESS4res *, resp);
   1334 }
   1335 
   1336 /* ARGSUSED */
   1337 static void
   1338 mds_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1339 	compound_state_t *cs)
   1340 {
   1341 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
   1342 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
   1343 	int error;
   1344 	vnode_t *vp = cs->vp;
   1345 	cred_t *cr = cs->cr;
   1346 	vattr_t va;
   1347 	caller_context_t ct;
   1348 
   1349 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
   1350 	    COMMIT4args *, args);
   1351 
   1352 	if (vp == NULL) {
   1353 		/*
   1354 		 * XXX kludge: fake the commit if we are a data server
   1355 		 * This will be replaced once we have nnop_commit().
   1356 		 */
   1357 		if (cs->nn != NULL) {
   1358 			*cs->statusp = resp->status = NFS4_OK;
   1359 			resp->writeverf = cs->instp->Write4verf;
   1360 		} else {
   1361 			*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1362 		}
   1363 		goto final;
   1364 	}
   1365 	if (cs->access == CS_ACCESS_DENIED) {
   1366 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1367 		goto final;
   1368 	}
   1369 
   1370 	if (args->offset + args->count < args->offset) {
   1371 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1372 		goto final;
   1373 	}
   1374 
   1375 	ct.cc_sysid = 0;
   1376 	ct.cc_pid = 0;
   1377 	ct.cc_caller_id = cs->instp->caller_id;
   1378 	ct.cc_flags = CC_DONTBLOCK;
   1379 
   1380 	va.va_mask = AT_UID;
   1381 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
   1382 
   1383 	/*
   1384 	 * If we can't get the attributes, then we can't do the
   1385 	 * right access checking.  So, we'll fail the request.
   1386 	 */
   1387 	if (error) {
   1388 		*cs->statusp = resp->status = puterrno4(error);
   1389 		goto final;
   1390 	}
   1391 	if (rdonly4(cs->exi, cs->vp, req)) {
   1392 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1393 		goto final;
   1394 	}
   1395 
   1396 	if (vp->v_type != VREG) {
   1397 		if (vp->v_type == VDIR)
   1398 			resp->status = NFS4ERR_ISDIR;
   1399 		else
   1400 			resp->status = NFS4ERR_INVAL;
   1401 		*cs->statusp = resp->status;
   1402 		goto final;
   1403 	}
   1404 
   1405 	if (crgetuid(cr) != va.va_uid &&
   1406 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, &ct))) {
   1407 		*cs->statusp = resp->status = puterrno4(error);
   1408 		goto final;
   1409 	}
   1410 
   1411 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, &ct);
   1412 	if (!error)
   1413 		error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
   1414 
   1415 	if (error) {
   1416 		*cs->statusp = resp->status = puterrno4(error);
   1417 		goto final;
   1418 	}
   1419 
   1420 	*cs->statusp = resp->status = NFS4_OK;
   1421 	resp->writeverf = cs->instp->Write4verf;
   1422 
   1423 final:
   1424 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
   1425 	    COMMIT4res *, resp);
   1426 }
   1427 
   1428 /*
   1429  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
   1430  * was completed. It does the nfsv4 create for special files.
   1431  *
   1432  * nfsv4 create is used to create non-regular files. For regular files,
   1433  * use nfsv4 open.
   1434  */
   1435 /* ARGSUSED */
   1436 static void
   1437 mds_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1438 	compound_state_t *cs)
   1439 {
   1440 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
   1441 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
   1442 	int error;
   1443 	struct vattr bva, iva, iva2, ava, *vap;
   1444 	cred_t *cr = cs->cr;
   1445 	vnode_t *dvp = cs->vp;
   1446 	vnode_t *vp = NULL;
   1447 	vnode_t *realvp;
   1448 	char *nm, *lnm;
   1449 	uint_t len, llen;
   1450 	int syncval = 0;
   1451 	struct nfs4_svgetit_arg sarg;
   1452 	struct nfs4_ntov_table ntov;
   1453 	struct statvfs64 sb;
   1454 	nfsstat4 status;
   1455 	caller_context_t ct;
   1456 
   1457 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
   1458 	    CREATE4args *, args);
   1459 
   1460 	resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1461 
   1462 	if (dvp == NULL) {
   1463 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1464 		goto final;
   1465 	}
   1466 
   1467 	/*
   1468 	 * If there is an unshared filesystem mounted on this vnode,
   1469 	 * do not allow to create an object in this directory.
   1470 	 */
   1471 	if (vn_ismntpt(dvp)) {
   1472 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1473 		goto final;
   1474 	}
   1475 
   1476 	ct.cc_sysid = 0;
   1477 	ct.cc_pid = 0;
   1478 	ct.cc_caller_id = cs->instp->caller_id;
   1479 	ct.cc_flags = CC_DONTBLOCK;
   1480 
   1481 	/* Verify that type is correct */
   1482 	switch (args->type) {
   1483 	case NF4LNK:
   1484 	case NF4BLK:
   1485 	case NF4CHR:
   1486 	case NF4SOCK:
   1487 	case NF4FIFO:
   1488 	case NF4DIR:
   1489 		break;
   1490 	default:
   1491 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1492 		goto final;
   1493 	};
   1494 
   1495 	if (cs->access == CS_ACCESS_DENIED) {
   1496 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1497 		goto final;
   1498 	}
   1499 	if (dvp->v_type != VDIR) {
   1500 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1501 		goto final;
   1502 	}
   1503 	if (!utf8_dir_verify(&args->objname)) {
   1504 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1505 		goto final;
   1506 	}
   1507 
   1508 	if (rdonly4(cs->exi, cs->vp, req)) {
   1509 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1510 		goto final;
   1511 	}
   1512 
   1513 	/*
   1514 	 * Name of newly created object
   1515 	 */
   1516 	nm = utf8_to_fn(&args->objname, &len, NULL);
   1517 	if (nm == NULL) {
   1518 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1519 		goto final;
   1520 	}
   1521 
   1522 	if (len > MAXNAMELEN) {
   1523 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1524 		kmem_free(nm, len);
   1525 		goto final;
   1526 	}
   1527 
   1528 	sarg.sbp = &sb;
   1529 	nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs));
   1530 
   1531 	status = do_rfs4_set_attrs(&resp->attrset,
   1532 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
   1533 
   1534 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
   1535 		status = NFS4ERR_INVAL;
   1536 
   1537 	if (status != NFS4_OK) {
   1538 		*cs->statusp = resp->status = status;
   1539 		kmem_free(nm, len);
   1540 		nfs4_ntov_table_free(&ntov, &sarg);
   1541 
   1542 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1543 		goto final;
   1544 	}
   1545 
   1546 	/* Get "before" change value */
   1547 	bva.va_mask = AT_CTIME|AT_SEQ;
   1548 	error = VOP_GETATTR(dvp, &bva, 0, cr, &ct);
   1549 	if (error) {
   1550 		*cs->statusp = resp->status = puterrno4(error);
   1551 		kmem_free(nm, len);
   1552 		nfs4_ntov_table_free(&ntov, &sarg);
   1553 
   1554 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1555 		goto final;
   1556 	}
   1557 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
   1558 
   1559 	vap = sarg.vap;
   1560 
   1561 	/*
   1562 	 * Set default initial values for attributes when not specified
   1563 	 * in createattrs.
   1564 	 */
   1565 	if ((vap->va_mask & AT_UID) == 0) {
   1566 		vap->va_uid = crgetuid(cr);
   1567 		vap->va_mask |= AT_UID;
   1568 	}
   1569 	if ((vap->va_mask & AT_GID) == 0) {
   1570 		vap->va_gid = crgetgid(cr);
   1571 		vap->va_mask |= AT_GID;
   1572 	}
   1573 
   1574 	vap->va_mask |= AT_TYPE;
   1575 	switch (args->type) {
   1576 	case NF4DIR:
   1577 		vap->va_type = VDIR;
   1578 		if ((vap->va_mask & AT_MODE) == 0) {
   1579 			vap->va_mode = 0700;	/* default: owner rwx only */
   1580 			vap->va_mask |= AT_MODE;
   1581 		}
   1582 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr, &ct, 0, NULL);
   1583 		if (error)
   1584 			break;
   1585 
   1586 		/*
   1587 		 * Get the initial "after" sequence number, if it fails,
   1588 		 * set to zero
   1589 		 */
   1590 		iva.va_mask = AT_SEQ;
   1591 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1592 			iva.va_seq = 0;
   1593 		break;
   1594 	case NF4LNK:
   1595 		vap->va_type = VLNK;
   1596 		if ((vap->va_mask & AT_MODE) == 0) {
   1597 			vap->va_mode = 0700;	/* default: owner rwx only */
   1598 			vap->va_mask |= AT_MODE;
   1599 		}
   1600 
   1601 		/*
   1602 		 * symlink names must be treated as data
   1603 		 */
   1604 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
   1605 
   1606 		if (lnm == NULL) {
   1607 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1608 			kmem_free(nm, len);
   1609 			nfs4_ntov_table_free(&ntov, &sarg);
   1610 			resp->attrset =
   1611 			    NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1612 			goto final;
   1613 		}
   1614 
   1615 		if (llen > MAXPATHLEN) {
   1616 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1617 			kmem_free(nm, len);
   1618 			kmem_free(lnm, llen);
   1619 			nfs4_ntov_table_free(&ntov, &sarg);
   1620 			resp->attrset =
   1621 			    NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1622 			goto final;
   1623 		}
   1624 
   1625 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, &ct, 0);
   1626 		if (lnm != NULL)
   1627 			kmem_free(lnm, llen);
   1628 		if (error)
   1629 			break;
   1630 
   1631 		/*
   1632 		 * Get the initial "after" sequence number, if it fails,
   1633 		 * set to zero
   1634 		 */
   1635 		iva.va_mask = AT_SEQ;
   1636 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1637 			iva.va_seq = 0;
   1638 
   1639 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr,
   1640 		    &ct, 0, NULL);
   1641 		if (error)
   1642 			break;
   1643 
   1644 		/*
   1645 		 * va_seq is not safe over VOP calls, check it again
   1646 		 * if it has changed zero out iva to force atomic = FALSE.
   1647 		 */
   1648 		iva2.va_mask = AT_SEQ;
   1649 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, &ct) ||
   1650 		    iva2.va_seq != iva.va_seq)
   1651 			iva.va_seq = 0;
   1652 		break;
   1653 	default:
   1654 		/*
   1655 		 * probably a special file.
   1656 		 */
   1657 		if ((vap->va_mask & AT_MODE) == 0) {
   1658 			vap->va_mode = 0600;	/* default: owner rw only */
   1659 			vap->va_mask |= AT_MODE;
   1660 		}
   1661 		syncval = FNODSYNC;
   1662 		/*
   1663 		 * We know this will only generate one VOP call
   1664 		 */
   1665 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
   1666 
   1667 		if (vp == NULL) {
   1668 			kmem_free(nm, len);
   1669 			nfs4_ntov_table_free(&ntov, &sarg);
   1670 			resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1671 			goto final;
   1672 		}
   1673 
   1674 		/*
   1675 		 * Get the initial "after" sequence number, if it fails,
   1676 		 * set to zero
   1677 		 */
   1678 		iva.va_mask = AT_SEQ;
   1679 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct))
   1680 			iva.va_seq = 0;
   1681 
   1682 		break;
   1683 	}
   1684 	kmem_free(nm, len);
   1685 
   1686 	if (error) {
   1687 		*cs->statusp = resp->status = puterrno4(error);
   1688 	}
   1689 
   1690 	/*
   1691 	 * Force modified data and metadata out to stable storage.
   1692 	 */
   1693 	(void) VOP_FSYNC(dvp, 0, cr, &ct);
   1694 
   1695 	if (resp->status != NFS4_OK) {
   1696 		if (vp != NULL)
   1697 			VN_RELE(vp);
   1698 		nfs4_ntov_table_free(&ntov, &sarg);
   1699 		resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs));
   1700 		goto final;
   1701 	}
   1702 
   1703 	/*
   1704 	 * Finish setup of cinfo response, "before" value already set.
   1705 	 * Get "after" change value, if it fails, simply return the
   1706 	 * before value.
   1707 	 */
   1708 	ava.va_mask = AT_CTIME|AT_SEQ;
   1709 	if (VOP_GETATTR(dvp, &ava, 0, cr, &ct)) {
   1710 		ava.va_ctime = bva.va_ctime;
   1711 		ava.va_seq = 0;
   1712 	}
   1713 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
   1714 
   1715 	/*
   1716 	 * True verification that object was created with correct
   1717 	 * attrs is impossible.  The attrs could have been changed
   1718 	 * immediately after object creation.  If attributes did
   1719 	 * not verify, the only recourse for the server is to
   1720 	 * destroy the object.  Maybe if some attrs (like gid)
   1721 	 * are set incorrectly, the object should be destroyed;
   1722 	 * however, seems bad as a default policy.  Do we really
   1723 	 * want to destroy an object over one of the times not
   1724 	 * verifying correctly?  For these reasons, the server
   1725 	 * currently sets bits in attrset for createattrs
   1726 	 * that were set; however, no verification is done.
   1727 	 *
   1728 	 * vmask_to_nmask accounts for vattr bits set on create
   1729 	 *	[do_rfs4_set_attrs() only sets resp bits for
   1730 	 *	 non-vattr/vfs bits.]
   1731 	 * Mask off any bits set by default so as not to return
   1732 	 * more attrset bits than were requested in createattrs
   1733 	 */
   1734 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset,
   1735 	    RFS4_ATTRVERS(cs));
   1736 	ATTRMAP_MASK(resp->attrset, args->createattrs.attrmask);
   1737 	nfs4_ntov_table_free(&ntov, &sarg);
   1738 
   1739 	error = mknfs41_fh(&cs->fh, vp, cs->exi);
   1740 	if (error) {
   1741 		*cs->statusp = resp->status = puterrno4(error);
   1742 	}
   1743 
   1744 	/*
   1745 	 * The cinfo.atomic = TRUE only if we got no errors, we have
   1746 	 * non-zero va_seq's, and it has incremented by exactly one
   1747 	 * during the creation and it didn't change during the VOP_LOOKUP
   1748 	 * or VOP_FSYNC.
   1749 	 */
   1750 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
   1751 	    iva.va_seq == (bva.va_seq + 1) &&
   1752 	    iva.va_seq == ava.va_seq)
   1753 		resp->cinfo.atomic = TRUE;
   1754 	else
   1755 		resp->cinfo.atomic = FALSE;
   1756 
   1757 	/*
   1758 	 * Force modified metadata out to stable storage.
   1759 	 *
   1760 	 * if a underlying vp exists, pass it to VOP_FSYNC
   1761 	 */
   1762 	if (VOP_REALVP(vp, &realvp, &ct) == 0)
   1763 		(void) VOP_FSYNC(realvp, syncval, cr, &ct);
   1764 	else
   1765 		(void) VOP_FSYNC(vp, syncval, cr, &ct);
   1766 
   1767 	if (resp->status != NFS4_OK) {
   1768 		VN_RELE(vp);
   1769 		goto final;
   1770 	}
   1771 	if (cs->vp)
   1772 		VN_RELE(cs->vp);
   1773 
   1774 	cs->vp = vp;
   1775 	*cs->statusp = resp->status = NFS4_OK;
   1776 
   1777 final:
   1778 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
   1779 	    CREATE4res *, resp);
   1780 }
   1781 
   1782 
   1783 /*ARGSUSED*/
   1784 static void
   1785 mds_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1786 	compound_state_t *cs)
   1787 {
   1788 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
   1789 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
   1790 	rfs4_deleg_state_t *dsp;
   1791 	nfsstat4 status;
   1792 
   1793 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
   1794 	    DELEGRETURN4args *, args);
   1795 
   1796 	status = rfs4_get_deleg_state(cs, &args->deleg_stateid, &dsp);
   1797 	resp->status = *cs->statusp = status;
   1798 	if (status != NFS4_OK)
   1799 		goto final;
   1800 
   1801 	/* Ensure specified filehandle matches */
   1802 	if (cs->vp != dsp->rds_finfo->rf_vp) {
   1803 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
   1804 	} else
   1805 		rfs4_return_deleg(dsp, FALSE);
   1806 
   1807 	rfs4_update_lease(dsp->rds_client);
   1808 
   1809 	rfs4_deleg_state_rele(dsp);
   1810 
   1811 final:
   1812 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
   1813 	    DELEGRETURN4res *, resp);
   1814 }
   1815 
   1816 
   1817 
   1818 /* ARGSUSED */
   1819 static void
   1820 mds_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1821 	compound_state_t *cs)
   1822 {
   1823 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
   1824 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   1825 	struct nfs4_svgetit_arg sarg;
   1826 	struct statvfs64 sb;
   1827 	nfsstat4 status;
   1828 
   1829 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
   1830 	    GETATTR4args *, args);
   1831 
   1832 	if (cs->vp == NULL) {
   1833 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1834 		goto final;
   1835 	}
   1836 
   1837 	if (cs->access == CS_ACCESS_DENIED) {
   1838 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1839 		goto final;
   1840 	}
   1841 
   1842 	sarg.sbp = &sb;
   1843 	sarg.cs = cs;
   1844 
   1845 	status = attrmap4_to_vattrmask(&args->attr_request, &sarg);
   1846 	if (status == NFS4_OK) {
   1847 		status = bitmap4_get_sysattrs(&sarg);
   1848 		if (status == NFS4_OK)
   1849 			status = do_rfs4_op_getattr(&args->attr_request,
   1850 			    &resp->obj_attributes, &sarg);
   1851 	}
   1852 	*cs->statusp = resp->status = status;
   1853 
   1854 final:
   1855 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
   1856 	    GETATTR4res *, resp);
   1857 }
   1858 
   1859 /*ARGSUSED*/
   1860 void
   1861 mds_op_getattr_free(nfs_resop4 *resop, compound_state_t *cs)
   1862 {
   1863 	/* Common function for NFSv4.0 and NFSv4.1 */
   1864 	rfs4_op_getattr_free(resop);
   1865 }
   1866 
   1867 /* ARGSUSED */
   1868 static void
   1869 mds_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1870 	compound_state_t *cs)
   1871 {
   1872 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   1873 
   1874 	DTRACE_NFSV4_1(op__getfh__start,
   1875 	    struct compound_state *, cs);
   1876 
   1877 	if (cs->vp == NULL) {
   1878 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1879 		goto final;
   1880 	}
   1881 	if (cs->access == CS_ACCESS_DENIED) {
   1882 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1883 		goto final;
   1884 	}
   1885 
   1886 	resp->object.nfs_fh4_val =
   1887 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
   1888 	nfs_fh4_copy(&cs->fh, &resp->object);
   1889 	*cs->statusp = resp->status = NFS4_OK;
   1890 
   1891 final:
   1892 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
   1893 	    GETFH4res *, resp);
   1894 }
   1895 
   1896 /*ARGSUSED*/
   1897 static void
   1898 mds_op_getfh_free(nfs_resop4 *resop, compound_state_t *cs)
   1899 {
   1900 	/* Common function for NFSv4.0 and NFSv4.1 */
   1901 	rfs4_op_getfh_free(resop);
   1902 }
   1903 
   1904 /*
   1905  * link: args: SAVED_FH: file, CURRENT_FH: target directory
   1906  *	 res: status. If success - CURRENT_FH unchanged, return change_info
   1907  */
   1908 /* ARGSUSED */
   1909 static void
   1910 mds_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1911 	compound_state_t *cs)
   1912 {
   1913 	LINK4args *args = &argop->nfs_argop4_u.oplink;
   1914 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
   1915 	int error;
   1916 	vnode_t *vp;
   1917 	vnode_t *dvp;
   1918 	struct vattr bdva, idva, adva;
   1919 	char *nm;
   1920 	uint_t  len;
   1921 	caller_context_t ct;
   1922 
   1923 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
   1924 	    LINK4args *, args);
   1925 
   1926 	/* SAVED_FH: source object */
   1927 	vp = cs->saved_vp;
   1928 	if (vp == NULL) {
   1929 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1930 		goto final;
   1931 	}
   1932 
   1933 	/* CURRENT_FH: target directory */
   1934 	dvp = cs->vp;
   1935 	if (dvp == NULL) {
   1936 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1937 		goto final;
   1938 	}
   1939 
   1940 	/*
   1941 	 * If there is a non-shared filesystem mounted on this vnode,
   1942 	 * do not allow to link any file in this directory.
   1943 	 */
   1944 	if (vn_ismntpt(dvp)) {
   1945 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1946 		goto final;
   1947 	}
   1948 
   1949 	if (cs->access == CS_ACCESS_DENIED) {
   1950 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1951 		goto final;
   1952 	}
   1953 
   1954 	/* Check source object's type validity */
   1955 	if (vp->v_type == VDIR) {
   1956 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   1957 		goto final;
   1958 	}
   1959 
   1960 	/* Check target directory's type */
   1961 	if (dvp->v_type != VDIR) {
   1962 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1963 		goto final;
   1964 	}
   1965 
   1966 	if (cs->saved_exi != cs->exi) {
   1967 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   1968 		goto final;
   1969 	}
   1970 
   1971 	if (!utf8_dir_verify(&args->newname)) {
   1972 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1973 		goto final;
   1974 	}
   1975 
   1976 	nm = utf8_to_fn(&args->newname, &len, NULL);
   1977 	if (nm == NULL) {
   1978 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1979 		goto final;
   1980 	}
   1981 
   1982 	if (len > MAXNAMELEN) {
   1983 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1984 		kmem_free(nm, len);
   1985 		goto final;
   1986 	}
   1987 
   1988 	if (rdonly4(cs->exi, cs->vp, req)) {
   1989 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1990 		kmem_free(nm, len);
   1991 		goto final;
   1992 	}
   1993 
   1994 	ct.cc_sysid = 0;
   1995 	ct.cc_pid = 0;
   1996 	ct.cc_caller_id = cs->instp->caller_id;
   1997 	ct.cc_flags = CC_DONTBLOCK;
   1998 
   1999 	/* Get "before" change value */
   2000 	bdva.va_mask = AT_CTIME|AT_SEQ;
   2001 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct);
   2002 	if (error) {
   2003 		*cs->statusp = resp->status = puterrno4(error);
   2004 		kmem_free(nm, len);
   2005 		goto final;
   2006 	}
   2007 
   2008 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   2009 
   2010 	error = VOP_LINK(dvp, vp, nm, cs->cr, &ct, 0);
   2011 
   2012 	kmem_free(nm, len);
   2013 
   2014 	/*
   2015 	 * Get the initial "after" sequence number, if it fails, set to zero
   2016 	 */
   2017 	idva.va_mask = AT_SEQ;
   2018 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct))
   2019 		idva.va_seq = 0;
   2020 
   2021 	/*
   2022 	 * Force modified data and metadata out to stable storage.
   2023 	 */
   2024 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, &ct);
   2025 	(void) VOP_FSYNC(dvp, 0, cs->cr, &ct);
   2026 
   2027 	if (error) {
   2028 		*cs->statusp = resp->status = puterrno4(error);
   2029 		goto final;
   2030 	}
   2031 
   2032 	/*
   2033 	 * Get "after" change value, if it fails, simply return the
   2034 	 * before value.
   2035 	 */
   2036 	adva.va_mask = AT_CTIME|AT_SEQ;
   2037 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) {
   2038 		adva.va_ctime = bdva.va_ctime;
   2039 		adva.va_seq = 0;
   2040 	}
   2041 
   2042 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   2043 
   2044 	/*
   2045 	 * The cinfo.atomic = TRUE only if we have
   2046 	 * non-zero va_seq's, and it has incremented by exactly one
   2047 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
   2048 	 */
   2049 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   2050 	    idva.va_seq == (bdva.va_seq + 1) &&
   2051 	    idva.va_seq == adva.va_seq)
   2052 		resp->cinfo.atomic = TRUE;
   2053 	else
   2054 		resp->cinfo.atomic = FALSE;
   2055 
   2056 	*cs->statusp = resp->status = NFS4_OK;
   2057 
   2058 final:
   2059 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
   2060 	    LINK4res *, resp);
   2061 }
   2062 
   2063 /*
   2064  * Used by mds_op_lookup and mds_op_lookupp to do the actual work.
   2065  */
   2066 
   2067 /* ARGSUSED */
   2068 static nfsstat4
   2069 mds_do_lookup(char *nm, uint_t buflen, struct svc_req *req,
   2070 	struct compound_state *cs)
   2071 {
   2072 	int error;
   2073 	int different_export = 0;
   2074 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
   2075 	struct exportinfo *exi = NULL, *pre_exi = NULL;
   2076 	nfsstat4 stat;
   2077 	fid_t fid;
   2078 	int attrdir, dotdot, walk;
   2079 	bool_t is_newvp = FALSE;
   2080 	caller_context_t ct;
   2081 	nfs41_fh_fmt_t *fhp;
   2082 
   2083 	fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val;
   2084 
   2085 	attrdir = ((cs->vp->v_flag & V_XATTRDIR) == V_XATTRDIR)
   2086 	    ? FH41_ATTRDIR : 0;
   2087 
   2088 	ASSERT(FH41_GET_FLAG(fhp, FH41_ATTRDIR) == attrdir);
   2089 
   2090 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
   2091 
   2092 	/*
   2093 	 * If dotdotting, then need to check whether it's
   2094 	 * above the root of a filesystem, or above an
   2095 	 * export point.
   2096 	 */
   2097 	if (dotdot) {
   2098 
   2099 		/*
   2100 		 * If dotdotting at the root of a filesystem, then
   2101 		 * need to traverse back to the mounted-on filesystem
   2102 		 * and do the dotdot lookup there.
   2103 		 */
   2104 		if (cs->vp->v_flag & VROOT) {
   2105 
   2106 			/*
   2107 			 * If at the system root, then can
   2108 			 * go up no further.
   2109 			 */
   2110 			if (VN_CMP(cs->vp, rootdir))
   2111 				return (puterrno4(ENOENT));
   2112 
   2113 			/*
   2114 			 * Traverse back to the mounted-on filesystem
   2115 			 */
   2116 			cs->vp = untraverse(cs->vp);
   2117 
   2118 			/*
   2119 			 * Set the different_export flag so we remember
   2120 			 * to pick up a new exportinfo entry for
   2121 			 * this new filesystem.
   2122 			 */
   2123 			different_export = 1;
   2124 		} else {
   2125 
   2126 			/*
   2127 			 * If dotdotting above an export point then set
   2128 			 * the different_export to get new export info.
   2129 			 */
   2130 			different_export = nfs_exported(cs->exi, cs->vp);
   2131 		}
   2132 	}
   2133 
   2134 	ct.cc_sysid = 0;
   2135 	ct.cc_pid = 0;
   2136 	ct.cc_caller_id = cs->instp->caller_id;
   2137 	ct.cc_flags = CC_DONTBLOCK;
   2138 
   2139 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
   2140 	    &ct, 0, NULL);
   2141 	if (error)
   2142 		return (puterrno4(error));
   2143 
   2144 	/*
   2145 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
   2146 	 *
   2147 	 * XXX if the vnode is a symlink and it is not visible in
   2148 	 * a pseudo filesystem, return ENOENT (not following symlink).
   2149 	 * V4 client can not mount such symlink.
   2150 	 *
   2151 	 * In the same exported filesystem, if the security flavor used
   2152 	 * is not an explicitly shared flavor, limit the view to the visible
   2153 	 * list entries only. This is not a WRONGSEC case because it's already
   2154 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
   2155 	 */
   2156 	if (!different_export &&
   2157 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
   2158 	    cs->access & CS_ACCESS_LIMITED)) {
   2159 		if (! nfs_visible(cs->exi, vp, &different_export)) {
   2160 			VN_RELE(vp);
   2161 			return (puterrno4(ENOENT));
   2162 		}
   2163 	}
   2164 
   2165 	/*
   2166 	 * If it's a mountpoint, then traverse it.
   2167 	 */
   2168 	if (vn_ismntpt(vp)) {
   2169 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
   2170 		pre_tvp = vp;		/* save pre-traversed vnode	*/
   2171 
   2172 		/*
   2173 		 * hold pre_tvp to counteract rele by traverse.  We will
   2174 		 * need pre_tvp below if checkexport4 fails
   2175 		 */
   2176 		VN_HOLD(pre_tvp);
   2177 		tvp = vp;
   2178 		if ((error = traverse(&tvp)) != 0) {
   2179 			VN_RELE(vp);
   2180 			VN_RELE(pre_tvp);
   2181 			return (puterrno4(error));
   2182 		}
   2183 		vp = tvp;
   2184 		different_export = 1;
   2185 
   2186 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
   2187 		/*
   2188 		 * The vfsp comparison is to handle the case where
   2189 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
   2190 		 * and NFS is unaware of local fs transistions because
   2191 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
   2192 		 * the dir and the obj returned by lookup will have different
   2193 		 * vfs ptrs.
   2194 		 */
   2195 		different_export = 1;
   2196 	}
   2197 
   2198 	if (different_export) {
   2199 		bzero(&fid, sizeof (fid));
   2200 		fid.fid_len = MAXFIDSZ;
   2201 		error = vop_fid_pseudo(vp, &fid);
   2202 		if (error) {
   2203 			VN_RELE(vp);
   2204 			if (pre_tvp)
   2205 				VN_RELE(pre_tvp);
   2206 			return (puterrno4(error));
   2207 		}
   2208 
   2209 		if (dotdot)
   2210 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
   2211 		else
   2212 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
   2213 
   2214 		if (exi == NULL) {
   2215 			if (pre_tvp) {
   2216 				/*
   2217 				 * If this vnode is a mounted-on vnode,
   2218 				 * but the mounted-on file system is not
   2219 				 * exported, send back the filehandle for
   2220 				 * the mounted-on vnode, not the root of
   2221 				 * the mounted-on file system.
   2222 				 */
   2223 				VN_RELE(vp);
   2224 				vp = pre_tvp;
   2225 				exi = pre_exi;
   2226 			} else {
   2227 				VN_RELE(vp);
   2228 				return (puterrno4(EACCES));
   2229 			}
   2230 		} else if (pre_tvp) {
   2231 			/* we're done with pre_tvp now. release extra hold */
   2232 			VN_RELE(pre_tvp);
   2233 		}
   2234 
   2235 		cs->exi = exi;
   2236 
   2237 		/*
   2238 		 * Now do a checkauth4.
   2239 		 *
   2240 		 * Checking here since the client/principle may not have
   2241 		 * access to the cs->exi exported file system.
   2242 		 *
   2243 		 * If the client has access we also need to validate
   2244 		 * the principle since it may have been re-mapped.
   2245 		 *
   2246 		 * We start with a new credential as a previous call to
   2247 		 * checkauth4(), via a PUT*FH operation, wrote over cs->cr.
   2248 		 */
   2249 		crfree(cs->cr);
   2250 		cs->cr = crdup(cs->basecr);
   2251 
   2252 		if (cs->vp)
   2253 			oldvp = cs->vp;
   2254 		cs->vp = vp;
   2255 		is_newvp = TRUE;
   2256 
   2257 		stat = call_checkauth4(cs, req);
   2258 		if (stat != NFS4_OK) {
   2259 			VN_RELE(cs->vp);
   2260 			cs->vp = oldvp;
   2261 			return (stat);
   2262 		}
   2263 	}
   2264 
   2265 	/*
   2266 	 * After various NFS checks, do a label check on the path
   2267 	 * component. The label on this path should either be the
   2268 	 * global zone's label or a zone's label. We are only
   2269 	 * interested in the zone's label because exported files
   2270 	 * in global zone is accessible (though read-only) to
   2271 	 * clients. The exportability/visibility check is already
   2272 	 * done before reaching this code.
   2273 	 */
   2274 	if (is_system_labeled()) {
   2275 		bslabel_t *clabel;
   2276 
   2277 		ASSERT(req->rq_label != NULL);
   2278 		clabel = req->rq_label;
   2279 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
   2280 		    "got client label from request(1)", struct svc_req *, req);
   2281 
   2282 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   2283 			if (!do_rfs_label_check(clabel, vp,
   2284 			    DOMINANCE_CHECK, cs->exi)) {
   2285 				error = EACCES;
   2286 				goto err_out;
   2287 			}
   2288 		} else {
   2289 			/*
   2290 			 * We grant access to admin_low label clients
   2291 			 * only if the client is trusted, i.e. also
   2292 			 * running Solaris Trusted Extension.
   2293 			 */
   2294 			struct sockaddr	*ca;
   2295 			int		addr_type;
   2296 			void		*ipaddr;
   2297 			tsol_tpc_t	*tp;
   2298 
   2299 			ca = (struct sockaddr *)svc_getrpccaller(
   2300 			    req->rq_xprt)->buf;
   2301 			if (ca->sa_family == AF_INET) {
   2302 				addr_type = IPV4_VERSION;
   2303 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
   2304 			} else if (ca->sa_family == AF_INET6) {
   2305 				addr_type = IPV6_VERSION;
   2306 				ipaddr = &((struct sockaddr_in6 *)
   2307 				    ca)->sin6_addr;
   2308 			}
   2309 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
   2310 			if (tp == NULL || tp->tpc_tp.tp_doi !=
   2311 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
   2312 			    SUN_CIPSO) {
   2313 				error = EACCES;
   2314 				goto err_out;
   2315 			}
   2316 		}
   2317 	}
   2318 
   2319 	error = mknfs41_fh(&cs->fh, vp, cs->exi);
   2320 
   2321 err_out:
   2322 	if (error) {
   2323 		if (is_newvp) {
   2324 			VN_RELE(cs->vp);
   2325 			cs->vp = oldvp;
   2326 		} else
   2327 			VN_RELE(vp);
   2328 		return (puterrno4(error));
   2329 	}
   2330 
   2331 	if (!is_newvp) {
   2332 		if (cs->vp)
   2333 			VN_RELE(cs->vp);
   2334 		cs->vp = vp;
   2335 	} else if (oldvp)
   2336 		VN_RELE(oldvp);
   2337 
   2338 	/*
   2339 	 * if did lookup on attrdir and didn't lookup .., set named
   2340 	 * attr fh flag
   2341 	 */
   2342 	if (attrdir && ! dotdot)
   2343 		FH41_SET_FLAG(fhp, FH41_NAMEDATTR);
   2344 
   2345 	/* Assume false for now, open proc will set this */
   2346 	cs->mandlock = FALSE;
   2347 
   2348 	return (NFS4_OK);
   2349 }
   2350 
   2351 /* ARGSUSED */
   2352 static void
   2353 mds_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2354 	compound_state_t *cs)
   2355 {
   2356 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
   2357 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
   2358 	char *nm;
   2359 	uint_t len;
   2360 
   2361 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
   2362 	    LOOKUP4args *, args);
   2363 
   2364 	if (cs->vp == NULL) {
   2365 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2366 		goto final;
   2367 	}
   2368 
   2369 	if (cs->vp->v_type == VLNK) {
   2370 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
   2371 		goto final;
   2372 	}
   2373 
   2374 	if (cs->vp->v_type != VDIR) {
   2375 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2376 		goto final;
   2377 	}
   2378 
   2379 	if (!utf8_dir_verify(&args->objname)) {
   2380 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2381 		goto final;
   2382 	}
   2383 
   2384 	nm = utf8_to_str(&args->objname, &len, NULL);
   2385 	if (nm == NULL) {
   2386 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2387 		goto final;
   2388 	}
   2389 
   2390 	if (len > MAXNAMELEN) {
   2391 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2392 		kmem_free(nm, len);
   2393 		goto final;
   2394 	}
   2395 
   2396 	*cs->statusp = resp->status = mds_do_lookup(nm, len, req, cs);
   2397 
   2398 	kmem_free(nm, len);
   2399 
   2400 final:
   2401 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
   2402 	    LOOKUP4res *, resp);
   2403 }
   2404 
   2405 /* ARGSUSED */
   2406 static void
   2407 mds_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2408 	compound_state_t *cs)
   2409 {
   2410 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
   2411 
   2412 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
   2413 
   2414 	if (cs->vp == NULL) {
   2415 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2416 		goto final;
   2417 	}
   2418 
   2419 	if (cs->vp->v_type != VDIR) {
   2420 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2421 		goto final;
   2422 	}
   2423 
   2424 	*cs->statusp = resp->status = mds_do_lookup("..", 3, req, cs);
   2425 
   2426 	/*
   2427 	 * From NFSV4 Specification, LOOKUPP should not check for
   2428 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
   2429 	 */
   2430 	if (resp->status == NFS4ERR_WRONGSEC) {
   2431 		*cs->statusp = resp->status = NFS4_OK;
   2432 	}
   2433 
   2434 final:
   2435 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
   2436 	    LOOKUPP4res *, resp);
   2437 }
   2438 
   2439 
   2440 /*ARGSUSED2*/
   2441 static void
   2442 mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2443 	compound_state_t *cs)
   2444 {
   2445 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
   2446 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
   2447 	vnode_t		*avp = NULL;
   2448 	int		lookup_flags = LOOKUP_XATTR, error;
   2449 	int		exp_ro = 0;
   2450 	caller_context_t ct;
   2451 
   2452 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
   2453 	    OPENATTR4args *, args);
   2454 
   2455 	if (cs->vp == NULL) {
   2456 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2457 		goto final;
   2458 	}
   2459 
   2460 	/*
   2461 	 * Make a couple of checks made by copen()
   2462 	 *
   2463 	 * Check to make sure underlying fs supports xattrs.  This
   2464 	 * is required because solaris filesystem implementations
   2465 	 * (UFS/TMPFS) don't enforce the noxattr mount option
   2466 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
   2467 	 * pathconf cmd or if fs supports cmd but doesn't claim
   2468 	 * support for xattr, return NOTSUPP.  It would be better
   2469 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
   2470 	 * that cmd is not available to VOP_PATHCONF interface
   2471 	 * (it's only implemented inside pathconf syscall)...
   2472 	 *
   2473 	 * Verify permission to put attributes on files (access
   2474 	 * checks from copen).
   2475 	 */
   2476 
   2477 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
   2478 		error = ENOTSUP;
   2479 		goto error_out;
   2480 	}
   2481 
   2482 	ct.cc_sysid = 0;
   2483 	ct.cc_pid = 0;
   2484 	ct.cc_caller_id = cs->instp->caller_id;
   2485 	ct.cc_flags = CC_DONTBLOCK;
   2486 
   2487 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, &ct) != 0) &&
   2488 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, &ct) != 0) &&
   2489 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, &ct) != 0)) {
   2490 		error = EACCES;
   2491 		goto error_out;
   2492 	}
   2493 
   2494 	/*
   2495 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
   2496 	 * the file system is exported read-only -- regardless of
   2497 	 * createdir flag.  Otherwise the attrdir would be created
   2498 	 * (assuming server fs isn't mounted readonly locally).  If
   2499 	 * VOP_LOOKUP returns ENOENT in this case, the error will
   2500 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
   2501 	 * because specfs has no VOP_LOOKUP op, so the macro would
   2502 	 * return ENOSYS.  EINVAL is returned by all (current)
   2503 	 * Solaris file system implementations when any of their
   2504 	 * restrictions are violated (xattr(dir) can't have xattrdir).
   2505 	 * Returning NOTSUPP is more appropriate in this case
   2506 	 * because the object will never be able to have an attrdir.
   2507 	 */
   2508 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
   2509 		lookup_flags |= CREATE_XATTR_DIR;
   2510 
   2511 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL,
   2512 	    cs->cr, &ct, 0, NULL);
   2513 
   2514 	if (error) {
   2515 		if (error == ENOENT && args->createdir && exp_ro)
   2516 			error = EROFS;
   2517 		else if (error == EINVAL || error == ENOSYS)
   2518 			error = ENOTSUP;
   2519 		goto error_out;
   2520 	}
   2521 
   2522 	ASSERT(avp->v_flag & V_XATTRDIR);
   2523 
   2524 	error = mknfs41_fh(&cs->fh, avp, cs->exi);
   2525 
   2526 	if (error) {
   2527 		VN_RELE(avp);
   2528 		goto error_out;
   2529 	}
   2530 
   2531 	VN_RELE(cs->vp);
   2532 	cs->vp = avp;
   2533 
   2534 	/*
   2535 	 * There is no requirement for an attrdir fh flag
   2536 	 * because the attrdir has a vnode flag to distinguish
   2537 	 * it from regular (non-xattr) directories.  The
   2538 	 * FH41_ATTRDIR flag is set for future sanity checks.
   2539 	 */
   2540 	FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val, FH41_ATTRDIR);
   2541 	*cs->statusp = resp->status = NFS4_OK;
   2542 	goto final;
   2543 
   2544 error_out:
   2545 
   2546 	*cs->statusp = resp->status = puterrno4(error);
   2547 
   2548 final:
   2549 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
   2550 	    OPENATTR4res *, resp);
   2551 }
   2552 
   2553 static int
   2554 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
   2555     caller_context_t *ct)
   2556 {
   2557 	int error;
   2558 	int i;
   2559 	clock_t delaytime;
   2560 
   2561 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
   2562 
   2563 	/*
   2564 	 * Don't block on mandatory locks. If this routine returns
   2565 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
   2566 	 */
   2567 	uio->uio_fmode = FNONBLOCK;
   2568 
   2569 	for (i = 0; i < rfs4_maxlock_tries; i++) {
   2570 		if (direction == FREAD) {
   2571 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
   2572 			error = VOP_READ(vp, uio, ioflag, cred, ct);
   2573 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
   2574 		} else {
   2575 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
   2576 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
   2577 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
   2578 		}
   2579 
   2580 		if (error != EAGAIN)
   2581 			break;
   2582 
   2583 		if (i < rfs4_maxlock_tries - 1) {
   2584 			delay(delaytime);
   2585 			delaytime *= 2;
   2586 		}
   2587 	}
   2588 
   2589 	return (error);
   2590 }
   2591 
   2592 /* ARGSUSED */
   2593 static void
   2594 mds_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2595 	compound_state_t *cs)
   2596 {
   2597 	READ4args *args = &argop->nfs_argop4_u.opread;
   2598 	READ4res *resp = &resop->nfs_resop4_u.opread;
   2599 	int error;
   2600 	nnode_t *nn = NULL;
   2601 	struct iovec iov;
   2602 	struct uio uio;
   2603 	bool_t *deleg = &cs->deleg;
   2604 	nfsstat4 stat;
   2605 	mblk_t *mp;
   2606 	int alloc_err = 0;
   2607 	caller_context_t ct;
   2608 	uint32_t nnioflags = 0;
   2609 
   2610 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
   2611 	    READ4args, args);
   2612 
   2613 	nn = cs->nn;
   2614 	if (nn == NULL) {
   2615 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2616 		goto final;
   2617 	}
   2618 	if (cs->access == CS_ACCESS_DENIED) {
   2619 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2620 		goto final;
   2621 	}
   2622 
   2623 	if ((stat = nnop_check_stateid(nn, cs, FREAD, &args->stateid,
   2624 	    FALSE, deleg, TRUE, &ct, NULL)) != NFS4_OK) {
   2625 		*cs->statusp = resp->status = stat;
   2626 		goto final;
   2627 	}
   2628 
   2629 	error = nnop_io_prep(nn, &nnioflags, cs->cr, &ct, args->offset,
   2630 	    args->count, NULL);
   2631 	if (error != 0) {
   2632 		*cs->statusp = resp->status = nnode_stat4(error, 1);
   2633 		goto out;
   2634 	}
   2635 
   2636 	if (nnioflags & NNODE_IO_FLAG_PAST_EOF) {
   2637 		*cs->statusp = resp->status = NFS4_OK;
   2638 		resp->eof = TRUE;
   2639 		resp->data_len = 0;
   2640 		resp->data_val = NULL;
   2641 		resp->mblk = NULL;
   2642 		*cs->statusp = resp->status = NFS4_OK;
   2643 		goto out;
   2644 	}
   2645 
   2646 	if (args->count == 0) {
   2647 		*cs->statusp = resp->status = NFS4_OK;
   2648 		resp->eof = FALSE;
   2649 		resp->data_len = 0;
   2650 		resp->data_val = NULL;
   2651 		resp->mblk = NULL;
   2652 		goto out;
   2653 	}
   2654 
   2655 	/*
   2656 	 * Do not allocate memory more than maximum allowed
   2657 	 * transfer size
   2658 	 */
   2659 	if (args->count > rfs4_tsize(req))
   2660 		args->count = rfs4_tsize(req);
   2661 
   2662 	if (args->wlist) {
   2663 		mp = NULL;
   2664 		(void) rdma_get_wchunk(req, &iov, args->wlist);
   2665 	} else {
   2666 		/*
   2667 		 * mp will contain the data to be sent out in the read reply.
   2668 		 * It will be freed after the reply has been sent.
   2669 		 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
   2670 		 * so that the call to xdrmblk_putmblk() never fails.
   2671 		 * If the first alloc of the requested size fails, then
   2672 		 * decrease the size to something more reasonable and wait
   2673 		 * for the allocation to occur.
   2674 		 */
   2675 		mp = allocb(RNDUP(args->count), BPRI_MED);
   2676 		if (mp == NULL) {
   2677 			if (args->count > MAXBSIZE)
   2678 				args->count = MAXBSIZE;
   2679 			mp = allocb_wait(RNDUP(args->count), BPRI_MED,
   2680 			    STR_NOSIG, &alloc_err);
   2681 		}
   2682 		ASSERT(mp != NULL);
   2683 		ASSERT(alloc_err == 0);
   2684 
   2685 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
   2686 		iov.iov_len = args->count;
   2687 	}
   2688 
   2689 	uio.uio_iov = &iov;
   2690 	uio.uio_iovcnt = 1;
   2691 	uio.uio_segflg = UIO_SYSSPACE;
   2692 	uio.uio_extflg = UIO_COPY_CACHED;
   2693 	uio.uio_loffset = args->offset;
   2694 	uio.uio_resid = args->count;
   2695 
   2696 	error = nnop_read(nn, &nnioflags, cs->cr, &ct, &uio, 0);
   2697 	if (error) {
   2698 		if (mp != NULL)
   2699 			freeb(mp);
   2700 		*cs->statusp = resp->status = nnode_stat4(error, 1);
   2701 		goto out;
   2702 	}
   2703 
   2704 	*cs->statusp = resp->status = NFS4_OK;
   2705 
   2706 	ASSERT(uio.uio_resid >= 0);
   2707 	resp->data_len = args->count - uio.uio_resid;
   2708 	resp->data_val = (char *)mp->b_datap->db_base;
   2709 	resp->mblk = mp;
   2710 
   2711 	resp->eof = (nnioflags & NNODE_IO_FLAG_EOF) ? TRUE : FALSE;
   2712 
   2713 out:
   2714 	nnop_io_release(nn, nnioflags, &ct);
   2715 
   2716 final:
   2717 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
   2718 	    READ4res *, resp);
   2719 }
   2720 
   2721 /*ARGSUSED*/
   2722 static void
   2723 mds_op_read_free(nfs_resop4 *resop, compound_state_t *cs)
   2724 {
   2725 	/* Common function for NFSv4.0 and NFSv4.1 */
   2726 	rfs4_op_read_free(resop);
   2727 }
   2728 
   2729 /* ARGSUSED */
   2730 static void
   2731 mds_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2732 	compound_state_t *cs)
   2733 {
   2734 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
   2735 	int error;
   2736 	vnode_t *vp;
   2737 	struct exportinfo *exi, *sav_exi;
   2738 	nfs41_fh_fmt_t *fhp;
   2739 	fid_t exp_fid;
   2740 
   2741 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
   2742 
   2743 	if (cs->vp) {
   2744 		VN_RELE(cs->vp);
   2745 		cs->vp = NULL;
   2746 	}
   2747 
   2748 	if (cs->cr)
   2749 		crfree(cs->cr);
   2750 
   2751 	cs->cr = crdup(cs->basecr);
   2752 
   2753 	vp = exi_public->exi_vp;
   2754 	if (vp == NULL) {
   2755 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   2756 		goto final;
   2757 	}
   2758 
   2759 	error = mknfs41_fh(&cs->fh, vp, exi_public);
   2760 	if (error != 0) {
   2761 		*cs->statusp = resp->status = puterrno4(error);
   2762 		goto final;
   2763 	}
   2764 	sav_exi = cs->exi;
   2765 	if (exi_public == exi_root) {
   2766 		/*
   2767 		 * No filesystem is actually shared public, so we default
   2768 		 * to exi_root. In this case, we must check whether root
   2769 		 * is exported.
   2770 		 */
   2771 		fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val;
   2772 
   2773 		exp_fid.fid_len = fhp->fh.v1.export_fid.len;
   2774 
   2775 		bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data,
   2776 		    exp_fid.fid_len);
   2777 
   2778 		/*
   2779 		 * if root filesystem is exported, the exportinfo struct that we
   2780 		 * should use is what checkexport4 returns, because root_exi is
   2781 		 * actually a mostly empty struct.
   2782 		 */
   2783 		exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL);
   2784 		cs->exi = ((exi != NULL) ? exi : exi_public);
   2785 	} else {
   2786 		/*
   2787 		 * it's a properly shared filesystem
   2788 		 */
   2789 		cs->exi = exi_public;
   2790 	}
   2791 
   2792 	VN_HOLD(vp);
   2793 	cs->vp = vp;
   2794 
   2795 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2796 		VN_RELE(cs->vp);
   2797 		cs->vp = NULL;
   2798 		cs->exi = sav_exi;
   2799 		goto final;
   2800 	}
   2801 
   2802 	*cs->statusp = resp->status = NFS4_OK;
   2803 
   2804 final:
   2805 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
   2806 	    PUTPUBFH4res *, resp);
   2807 }
   2808 
   2809 /*
   2810  * XXX - issue with put*fh operations.
   2811  *
   2812  * let us assume that /export/home is shared via NFS and a NFS client
   2813  * wishes to mount /export/home/joe.
   2814  *
   2815  * If /export, home, or joe have restrictive search permissions, then
   2816  * the NFS Server should not return a filehandle to the client.
   2817  *
   2818  * This case is easy to enforce. However, the NFS Client does not know
   2819  * which security flavor should be used until the pathname has been
   2820  * fully resolved. In addition there is another complication for uid
   2821  * mapping. If the credential being used is root, the default behaviour
   2822  * will be to map it to the anonymous user. However the NFS Server can not
   2823  * map it until the pathname has been fully resolved.
   2824  *
   2825  * XXX: JEFF:  Proposed solution.
   2826  *
   2827  * Luckily, SECINFO uses a full pathname.  So what we will
   2828  * have to do in mds_op_lookup is check that flavor of
   2829  * the target object matches that of the request, and if root was the
   2830  * caller, check for the root= and anon= options, and if necessary,
   2831  * repeat the lookup using the right cred_t.
   2832  *
   2833  * But that's not done yet.
   2834  */
   2835 /* ARGSUSED */
   2836 static void
   2837 mds_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2838 	compound_state_t *cs)
   2839 {
   2840 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
   2841 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
   2842 	nfs41_fh_fmt_t *fhp = NULL;
   2843 	fid_t  exp_fid;
   2844 	int error;
   2845 
   2846 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
   2847 	    PUTFH4args *, args);
   2848 
   2849 	/*
   2850 	 * release the old nnode, vnode and cred.
   2851 	 */
   2852 	if (cs->nn)
   2853 		nnode_rele(&cs->nn);
   2854 	if (cs->vp) {
   2855 		VN_RELE(cs->vp);
   2856 		cs->vp = NULL;
   2857 	}
   2858 	if (cs->cr) {
   2859 		crfree(cs->cr);
   2860 		cs->cr = NULL;
   2861 	}
   2862 
   2863 
   2864 	/*
   2865 	 * Check exportinfo only if it's a FH41_TYPE_NFS filehandle.
   2866 	 * If the filehandle is otherwise incorrect,
   2867 	 * nnode_from_fh_v41() will return an error.
   2868 	 */
   2869 	fhp = (nfs41_fh_fmt_t *)args->object.nfs_fh4_val;
   2870 	if (fhp->type == FH41_TYPE_NFS) {
   2871 		exp_fid.fid_len = fhp->fh.v1.export_fid.len;
   2872 		bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data,
   2873 		    exp_fid.fid_len);
   2874 		cs->exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL);
   2875 		if (cs->exi == NULL) {
   2876 			*cs->statusp = resp->status = NFS4ERR_STALE;
   2877 			DTRACE_PROBE(nfss41__e__chkexp);
   2878 			goto final;
   2879 		}
   2880 	}
   2881 
   2882 	error = nnode_from_fh_v41(&cs->nn, &args->object);
   2883 	if (error != 0) {
   2884 		resp->status = *cs->statusp = nnode_stat4(error, 1);
   2885 		goto final;
   2886 	}
   2887 	ASSERT(cs->nn != NULL);
   2888 
   2889 	cs->vp = nnop_io_getvp(cs->nn);
   2890 
   2891 	cs->cr = crdup(cs->basecr);
   2892 	ASSERT(cs->cr != NULL);
   2893 
   2894 	if (fhp->type == FH41_TYPE_NFS) {
   2895 		if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2896 			nnode_rele(&cs->nn);
   2897 			VN_RELE(cs->vp);
   2898 			cs->vp = NULL;
   2899 			crfree(cs->cr);
   2900 			cs->cr = NULL;
   2901 			*cs->statusp = resp->status;
   2902 			DTRACE_PROBE(nfss41__e__fail_auth);
   2903 			goto final;
   2904 		}
   2905 	}
   2906 
   2907 	nfs_fh4_copy(&args->object, &cs->fh);
   2908 	*cs->statusp = resp->status = NFS4_OK;
   2909 	cs->deleg = FALSE;
   2910 
   2911 final:
   2912 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
   2913 	    PUTFH4res *, resp);
   2914 }
   2915 
   2916 /* ARGSUSED */
   2917 static void
   2918 mds_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2919 	compound_state_t *cs)
   2920 
   2921 {
   2922 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
   2923 	int error;
   2924 	fid_t fid;
   2925 	struct exportinfo *exi, *sav_exi;
   2926 
   2927 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
   2928 
   2929 	if (cs->vp) {
   2930 		VN_RELE(cs->vp);
   2931 		cs->vp = NULL;
   2932 	}
   2933 
   2934 	if (cs->cr)
   2935 		crfree(cs->cr);
   2936 
   2937 	cs->cr = crdup(cs->basecr);
   2938 
   2939 	/*
   2940 	 * Using rootdir, the system root vnode,
   2941 	 * get its fid.
   2942 	 */
   2943 	bzero(&fid, sizeof (fid));
   2944 	fid.fid_len = MAXFIDSZ;
   2945 	error = vop_fid_pseudo(rootdir, &fid);
   2946 	if (error != 0) {
   2947 		*cs->statusp = resp->status = puterrno4(error);
   2948 		goto final;
   2949 	}
   2950 
   2951 	/*
   2952 	 * Then use the root fsid & fid it to find out if it's exported
   2953 	 *
   2954 	 * If the server root isn't exported directly, then
   2955 	 * it should at least be a pseudo export based on
   2956 	 * one or more exports further down in the server's
   2957 	 * file tree.
   2958 	 */
   2959 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
   2960 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
   2961 		DTRACE_PROBE(nfss41__e__chkexp);
   2962 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   2963 		goto final;
   2964 	}
   2965 
   2966 	/*
   2967 	 * Now make a filehandle based on the root
   2968 	 * export and root vnode.
   2969 	 */
   2970 	error = mknfs41_fh(&cs->fh, rootdir, exi);
   2971 	if (error != 0) {
   2972 		*cs->statusp = resp->status = puterrno4(error);
   2973 		goto final;
   2974 	}
   2975 
   2976 	sav_exi = cs->exi;
   2977 	cs->exi = exi;
   2978 
   2979 	VN_HOLD(rootdir);
   2980 	cs->vp = rootdir;
   2981 
   2982 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   2983 		VN_RELE(rootdir);
   2984 		cs->vp = NULL;
   2985 		cs->exi = sav_exi;
   2986 		goto final;
   2987 	}
   2988 
   2989 	*cs->statusp = resp->status = NFS4_OK;
   2990 	cs->deleg = FALSE;
   2991 
   2992 final:
   2993 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
   2994 	    PUTROOTFH4res *, resp);
   2995 }
   2996 
   2997 /*
   2998  * A directory entry is a valid nfsv4 entry if
   2999  * - it has a non-zero ino
   3000  * - it is not a dot or dotdot name
   3001  * - it is visible in a pseudo export or in a real export that can
   3002  *   only have a limited view.
   3003  */
   3004 static bool_t
   3005 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
   3006 		int *expseudo, int check_visible)
   3007 {
   3008 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
   3009 		*expseudo = 0;
   3010 		return (FALSE);
   3011 	}
   3012 
   3013 	if (! check_visible) {
   3014 		*expseudo = 0;
   3015 		return (TRUE);
   3016 	}
   3017 
   3018 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
   3019 }
   3020 
   3021 
   3022 /*
   3023  * readlink: args: CURRENT_FH.
   3024  *	res: status. If success - CURRENT_FH unchanged, return linktext.
   3025  */
   3026 
   3027 /* ARGSUSED */
   3028 static void
   3029 mds_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3030 	compound_state_t *cs)
   3031 {
   3032 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3033 	int error;
   3034 	vnode_t *vp;
   3035 	struct iovec iov;
   3036 	struct vattr va;
   3037 	struct uio uio;
   3038 	char *data;
   3039 	caller_context_t ct;
   3040 
   3041 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
   3042 
   3043 	/* CURRENT_FH: directory */
   3044 	vp = cs->vp;
   3045 	if (vp == NULL) {
   3046 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3047 		goto final;
   3048 	}
   3049 
   3050 	if (cs->access == CS_ACCESS_DENIED) {
   3051 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3052 		goto final;
   3053 	}
   3054 
   3055 	if (vp->v_type == VDIR) {
   3056 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   3057 		goto final;
   3058 	}
   3059 
   3060 	if (vp->v_type != VLNK) {
   3061 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3062 		goto final;
   3063 	}
   3064 
   3065 	ct.cc_sysid = 0;
   3066 	ct.cc_pid = 0;
   3067 	ct.cc_caller_id = cs->instp->caller_id;
   3068 	ct.cc_flags = CC_DONTBLOCK;
   3069 
   3070 	va.va_mask = AT_MODE;
   3071 	error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3072 	if (error) {
   3073 		*cs->statusp = resp->status = puterrno4(error);
   3074 		goto final;
   3075 	}
   3076 
   3077 	if (MANDLOCK(vp, va.va_mode)) {
   3078 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3079 		goto final;
   3080 	}
   3081 
   3082 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
   3083 
   3084 	iov.iov_base = data;
   3085 	iov.iov_len = MAXPATHLEN;
   3086 	uio.uio_iov = &iov;
   3087 	uio.uio_iovcnt = 1;
   3088 	uio.uio_segflg = UIO_SYSSPACE;
   3089 	uio.uio_extflg = UIO_COPY_CACHED;
   3090 	uio.uio_loffset = 0;
   3091 	uio.uio_resid = MAXPATHLEN;
   3092 
   3093 	error = VOP_READLINK(vp, &uio, cs->cr, &ct);
   3094 
   3095 	if (error) {
   3096 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3097 		*cs->statusp = resp->status = puterrno4(error);
   3098 		goto final;
   3099 	}
   3100 
   3101 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
   3102 
   3103 	/*
   3104 	 * treat link name as data
   3105 	 */
   3106 	(void) str_to_utf8(data, &resp->link);
   3107 
   3108 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3109 	*cs->statusp = resp->status = NFS4_OK;
   3110 
   3111 final:
   3112 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
   3113 	    READLINK4res *, resp);
   3114 }
   3115 
   3116 /*ARGSUSED*/
   3117 static void
   3118 mds_op_readlink_free(nfs_resop4 *resop, compound_state_t *cs)
   3119 {
   3120 	/* Common function used for NFSv4.0 and NFSv4.1 */
   3121 	rfs4_op_readlink_free(resop);
   3122 }
   3123 
   3124 /* ARGSUSED */
   3125 static void
   3126 mds_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
   3127     struct svc_req *req, compound_state_t *cs)
   3128 {
   3129 	RECLAIM_COMPLETE4args *args = &argop->nfs_argop4_u.opreclaim_complete;
   3130 	RECLAIM_COMPLETE4res *resp = &resop->nfs_resop4_u.opreclaim_complete;
   3131 	rfs4_client_t *cp;
   3132 
   3133 	cp = cs->cp;
   3134 
   3135 	if (cp->rc_reclaim_completed) {
   3136 		*cs->statusp = resp->rcr_status = NFS4ERR_COMPLETE_ALREADY;
   3137 		return;
   3138 	}
   3139 
   3140 	if (args->rca_one_fs) {
   3141 		/* do what?  we don't track this */
   3142 		*cs->statusp = resp->rcr_status = NFS4_OK;
   3143 		return;
   3144 	}
   3145 
   3146 	cp->rc_reclaim_completed = 1;
   3147 
   3148 	/* did we have reclaimable state stored for this client? */
   3149 	if (cp->rc_can_reclaim)
   3150 		atomic_add_32(&(cs->instp->reclaim_cnt), -1);
   3151 
   3152 	*cs->statusp = resp->rcr_status = NFS4_OK;
   3153 }
   3154 
   3155 /*
   3156  * short utility function to lookup a file and recall the delegation
   3157  */
   3158 static rfs4_file_t *
   3159 mds_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
   3160 	int *lkup_error, struct compound_state *cs)
   3161 {
   3162 	vnode_t *vp;
   3163 	rfs4_file_t *fp = NULL;
   3164 	bool_t fcreate = FALSE;
   3165 	int error;
   3166 
   3167 	if (vpp)
   3168 		*vpp = NULL;
   3169 
   3170 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
   3171 	    NULL, 0, NULL)) == 0) {
   3172 		if (vp->v_type == VREG)
   3173 			fp = rfs4_findfile(cs->instp, vp, NULL, &fcreate);
   3174 		if (vpp)
   3175 			*vpp = vp;
   3176 		else
   3177 			VN_RELE(vp);
   3178 	}
   3179 
   3180 	if (lkup_error)
   3181 		*lkup_error = error;
   3182 
   3183 	return (fp);
   3184 }
   3185 
   3186 static int
   3187 do_ctl_mds_remove(vnode_t *vp, rfs4_file_t *fp, compound_state_t *cs)
   3188 {
   3189 	fid_t fid;
   3190 	nfs41_fid_t nfs41_fid;
   3191 	int error = 0;
   3192 
   3193 	/*
   3194 	 * Use the file layout to determine which data servers to
   3195 	 * send DS_REMOVEs to.  If the layout is not cached in the
   3196 	 * rfs4_file_t either this means that we do not have a layout
   3197 	 * or it needs to be read in from disk.  Right now, we do not
   3198 	 * attempt to read the layout in from disk, but future phases
   3199 	 * of REMOVE handling will take this into consideration.
   3200 	 *
   3201 	 * Known Problems with this implementation of REMOVE:
   3202 	 * 1. Not attempting to read a layout from disk could mean
   3203 	 * that if an on-disk layout did exist, storage on the data
   3204 	 * servers will not be freed.
   3205 	 *
   3206 	 * 2. The server populates the layout stored in the rfs4_file_t
   3207 	 * when it receives a LAYOUTGET.  If the file has been written
   3208 	 * (perhaps in a past server instance), but no clients have
   3209 	 * issued new LAYOUTGETs, we will not have a cached layout and
   3210 	 * we will not free space on the data servers.
   3211 	 *
   3212 	 * 3. If any of the DS_REMOVE calls to the data servers fail
   3213 	 * the errors are ignored and will not be retried.  This may
   3214 	 * cause leaked space on the the data server.
   3215 	 */
   3216 	if (fp->rf_mlo != NULL) {
   3217 		bzero(&fid, sizeof (fid));
   3218 		fid.fid_len = MAXFIDSZ;
   3219 
   3220 		error = vop_fid_pseudo(vp, &fid);
   3221 		if (error) {
   3222 			DTRACE_NFSV4_1(nfss__e__vop_fid_pseudo_failed,
   3223 			    int, error);
   3224 			return (error);
   3225 		} else {
   3226 			nfs41_fid.len = fid.fid_len;
   3227 			bcopy(fid.fid_data, nfs41_fid.val, nfs41_fid.len);
   3228 		}
   3229 
   3230 		error = ctl_mds_clnt_remove_file(cs->instp, cs->exi->exi_fsid,
   3231 		    nfs41_fid, fp->rf_mlo);
   3232 	} else
   3233 		DTRACE_PROBE(nfss__i__layout_is_null_cannot_remove);
   3234 
   3235 	return (error);
   3236 }
   3237 
   3238 /*
   3239  * remove: args: CURRENT_FH: directory; name.
   3240  *	res: status. If success - CURRENT_FH unchanged, return change_info
   3241  *		for directory.
   3242  */
   3243 /* ARGSUSED */
   3244 static void
   3245 mds_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3246 	compound_state_t *cs)
   3247 {
   3248 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
   3249 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
   3250 	int error;
   3251 	vnode_t *dvp, *vp;
   3252 	struct vattr bdva, idva, adva;
   3253 	char *nm;
   3254 	uint_t len;
   3255 	rfs4_file_t *fp;
   3256 	int in_crit = 0;
   3257 	bslabel_t *clabel;
   3258 	caller_context_t ct;
   3259 
   3260 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
   3261 	    REMOVE4args *, args);
   3262 
   3263 	/* CURRENT_FH: directory */
   3264 	dvp = cs->vp;
   3265 	if (dvp == NULL) {
   3266 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3267 		goto final;
   3268 	}
   3269 
   3270 	if (cs->access == CS_ACCESS_DENIED) {
   3271 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3272 		goto final;
   3273 	}
   3274 
   3275 	/*
   3276 	 * If there is an unshared filesystem mounted on this vnode,
   3277 	 * Do not allow to remove anything in this directory.
   3278 	 */
   3279 	if (vn_ismntpt(dvp)) {
   3280 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3281 		goto final;
   3282 	}
   3283 
   3284 	if (dvp->v_type != VDIR) {
   3285 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   3286 		goto final;
   3287 	}
   3288 
   3289 	if (!utf8_dir_verify(&args->target)) {
   3290 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3291 		goto final;
   3292 	}
   3293 
   3294 	/*
   3295 	 * Lookup the file so that we can check if it's a directory
   3296 	 */
   3297 	nm = utf8_to_fn(&args->target, &len, NULL);
   3298 	if (nm == NULL) {
   3299 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3300 		goto final;
   3301 	}
   3302 
   3303 	if (len > MAXNAMELEN) {
   3304 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   3305 		kmem_free(nm, len);
   3306 		goto final;
   3307 	}
   3308 
   3309 	if (rdonly4(cs->exi, cs->vp, req)) {
   3310 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   3311 		kmem_free(nm, len);
   3312 		goto final;
   3313 	}
   3314 
   3315 	/*
   3316 	 * Lookup the file to determine type and while we are see if
   3317 	 * there is a file struct around and check for delegation.
   3318 	 * We don't need to acquire va_seq before this lookup, if
   3319 	 * it causes an update, cinfo.before will not match, which will
   3320 	 * trigger a cache flush even if atomic is TRUE.
   3321 	 */
   3322 	fp = mds_lookup_and_findfile(dvp, nm, &vp, &error, cs);
   3323 	if (vp != NULL) {
   3324 		if (rfs4_check_delegated(FWRITE, vp, TRUE, TRUE, TRUE, NULL)) {
   3325 			VN_RELE(vp);
   3326 			rfs4_file_rele(fp);
   3327 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   3328 			kmem_free(nm, len);
   3329 			goto final;
   3330 		}
   3331 	} else {	/* Didn't find anything to remove */
   3332 		*cs->statusp = resp->status = error;
   3333 		kmem_free(nm, len);
   3334 		goto final;
   3335 	}
   3336 
   3337 	if (nbl_need_check(vp)) {
   3338 		nbl_start_crit(vp, RW_READER);
   3339 		in_crit = 1;
   3340 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, &ct)) {
   3341 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   3342 			kmem_free(nm, len);
   3343 			nbl_end_crit(vp);
   3344 			VN_RELE(vp);
   3345 			if (fp) {
   3346 				rfs4_clear_dont_grant(cs->instp, fp);
   3347 				rfs4_file_rele(fp);
   3348 			}
   3349 			goto final;
   3350 		}
   3351 	}
   3352 
   3353 	/* check label before allowing removal */
   3354 	if (is_system_labeled()) {
   3355 		ASSERT(req->rq_label != NULL);
   3356 		clabel = req->rq_label;
   3357 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   3358 		    "got client label from request(1)",
   3359 		    struct svc_req *, req);
   3360 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3361 			if (!do_rfs_label_check(clabel, vp,
   3362 			    EQUALITY_CHECK, cs->exi)) {
   3363 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3364 				kmem_free(nm, len);
   3365 				if (in_crit)
   3366 					nbl_end_crit(vp);
   3367 				VN_RELE(vp);
   3368 				if (fp) {
   3369 					rfs4_clear_dont_grant(cs->instp, fp);
   3370 					rfs4_file_rele(fp);
   3371 				}
   3372 				goto final;
   3373 			}
   3374 		}
   3375 	}
   3376 
   3377 	ct.cc_sysid = 0;
   3378 	ct.cc_pid = 0;
   3379 	ct.cc_caller_id = cs->instp->caller_id;
   3380 	ct.cc_flags = CC_DONTBLOCK;
   3381 
   3382 	/* Get dir "before" change value */
   3383 	bdva.va_mask = AT_CTIME|AT_SEQ;
   3384 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct);
   3385 	if (error) {
   3386 		*cs->statusp = resp->status = puterrno4(error);
   3387 		kmem_free(nm, len);
   3388 		if (in_crit)
   3389 			nbl_end_crit(vp);
   3390 		VN_RELE(vp);
   3391 		if (fp) {
   3392 			rfs4_clear_dont_grant(cs->instp, fp);
   3393 			rfs4_file_rele(fp);
   3394 		}
   3395 		goto final;
   3396 	}
   3397 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   3398 
   3399 	/* Actually do the REMOVE operation */
   3400 	if (vp->v_type == VDIR) {
   3401 		/*
   3402 		 * Can't remove a directory that has a mounted-on filesystem.
   3403 		 */
   3404 		if (vn_ismntpt(vp)) {
   3405 			error = EACCES;
   3406 		} else {
   3407 			/*
   3408 			 * System V defines rmdir to return EEXIST,
   3409 			 * not * ENOTEMPTY, if the directory is not
   3410 			 * empty.  A System V NFS server needs to map
   3411 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
   3412 			 * transmit over the wire.
   3413 			 */
   3414 			if ((error = VOP_RMDIR(dvp, nm, rootdir,
   3415 			    cs->cr, &ct, 0)) == EEXIST)
   3416 				error = ENOTEMPTY;
   3417 		}
   3418 	} else {
   3419 		if ((error = VOP_REMOVE(dvp, nm, cs->cr, &ct, 0)) == 0 &&
   3420 		    fp != NULL) {
   3421 			struct vattr va;
   3422 			vnode_t *tvp;
   3423 
   3424 			rfs4_dbe_lock(fp->rf_dbe);
   3425 			tvp = fp->rf_vp;
   3426 			if (tvp)
   3427 				VN_HOLD(tvp);
   3428 			rfs4_dbe_unlock(fp->rf_dbe);
   3429 
   3430 			if (tvp) {
   3431 				/*
   3432 				 * This is va_seq safe because we are not
   3433 				 * manipulating dvp.
   3434 				 */
   3435 				va.va_mask = AT_NLINK;
   3436 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr,
   3437 				    &ct) && va.va_nlink == 0) {
   3438 					if (in_crit) {
   3439 						nbl_end_crit(vp);
   3440 						in_crit = 0;
   3441 					}
   3442 
   3443 					/* Remove the layout */
   3444 					mds_delete_layout(tvp);
   3445 
   3446 					/*
   3447 					 * Remove objects on data servers.
   3448 					 * Ignore errors for now..
   3449 					 */
   3450 					(void) do_ctl_mds_remove(tvp, fp, cs);
   3451 
   3452 					/* Remove state on file remove */
   3453 					rfs4_close_all_state(fp);
   3454 				}
   3455 				VN_RELE(tvp);
   3456 			}
   3457 		}
   3458 	}
   3459 
   3460 	if (in_crit)
   3461 		nbl_end_crit(vp);
   3462 	VN_RELE(vp);
   3463 
   3464 	if (fp) {
   3465 		rfs4_clear_dont_grant(cs->instp, fp);
   3466 		rfs4_file_rele(fp);
   3467 		fp = NULL;
   3468 	}
   3469 	kmem_free(nm, len);
   3470 
   3471 	if (error) {
   3472 		*cs->statusp = resp->status = puterrno4(error);
   3473 		goto final;
   3474 	}
   3475 
   3476 	/*
   3477 	 * Get the initial "after" sequence number, if it fails, set to zero
   3478 	 */
   3479 	idva.va_mask = AT_SEQ;
   3480 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct))
   3481 		idva.va_seq = 0;
   3482 
   3483 	/*
   3484 	 * Force modified data and metadata out to stable storage.
   3485 	 */
   3486 	(void) VOP_FSYNC(dvp, 0, cs->cr, &ct);
   3487 
   3488 	/*
   3489 	 * Get "after" change value, if it fails, simply return the
   3490 	 * before value.
   3491 	 */
   3492 	adva.va_mask = AT_CTIME|AT_SEQ;
   3493 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) {
   3494 		adva.va_ctime = bdva.va_ctime;
   3495 		adva.va_seq = 0;
   3496 	}
   3497 
   3498 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   3499 
   3500 	/*
   3501 	 * The cinfo.atomic = TRUE only if we have
   3502 	 * non-zero va_seq's, and it has incremented by exactly one
   3503 	 * during the VOP_REMOVE/RMDIR and it didn't change during
   3504 	 * the VOP_FSYNC.
   3505 	 */
   3506 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   3507 	    idva.va_seq == (bdva.va_seq + 1) &&
   3508 	    idva.va_seq == adva.va_seq)
   3509 		resp->cinfo.atomic = TRUE;
   3510 	else
   3511 		resp->cinfo.atomic = FALSE;
   3512 
   3513 	*cs->statusp = resp->status = NFS4_OK;
   3514 
   3515 final:
   3516 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
   3517 	    REMOVE4res *, resp);
   3518 }
   3519 
   3520 /*
   3521  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
   3522  *		oldname and newname.
   3523  *	res: status. If success - CURRENT_FH unchanged, return change_info
   3524  *		for both from and target directories.
   3525  */
   3526 /* ARGSUSED */
   3527 static void
   3528 mds_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3529 	compound_state_t *cs)
   3530 {
   3531 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
   3532 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
   3533 	int error;
   3534 	vnode_t *odvp;
   3535 	vnode_t *ndvp;
   3536 	vnode_t *srcvp, *targvp;
   3537 	struct vattr obdva, oidva, oadva;
   3538 	struct vattr nbdva, nidva, nadva;
   3539 	char *onm, *nnm;
   3540 	uint_t olen, nlen;
   3541 	rfs4_file_t *fp, *sfp;
   3542 	int in_crit_src, in_crit_targ;
   3543 	int fp_rele_grant_hold, sfp_rele_grant_hold;
   3544 	bslabel_t *clabel;
   3545 	caller_context_t ct;
   3546 
   3547 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
   3548 	    RENAME4args *, args);
   3549 
   3550 	fp = sfp = NULL;
   3551 	srcvp = targvp = NULL;
   3552 	in_crit_src = in_crit_targ = 0;
   3553 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
   3554 
   3555 	/* CURRENT_FH: target directory */
   3556 	ndvp = cs->vp;
   3557 	if (ndvp == NULL) {
   3558 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3559 		goto final;
   3560 	}
   3561 
   3562 	/* SAVED_FH: from directory */
   3563 	odvp = cs->saved_vp;
   3564 	if (odvp == NULL) {
   3565 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3566 		goto final;
   3567 	}
   3568 
   3569 	if (cs->access == CS_ACCESS_DENIED) {
   3570 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3571 		goto final;
   3572 	}
   3573 
   3574 	/*
   3575 	 * If there is an unshared filesystem mounted on this vnode,
   3576 	 * do not allow to rename objects in this directory.
   3577 	 */
   3578 	if (vn_ismntpt(odvp)) {
   3579 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3580 		goto final;
   3581 	}
   3582 
   3583 	/*
   3584 	 * If there is an unshared filesystem mounted on this vnode,
   3585 	 * do not allow to rename to this directory.
   3586 	 */
   3587 	if (vn_ismntpt(ndvp)) {
   3588 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3589 		goto final;
   3590 	}
   3591 
   3592 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
   3593 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   3594 		goto final;
   3595 	}
   3596 
   3597 	if (cs->saved_exi != cs->exi) {
   3598 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   3599 		goto final;
   3600 	}
   3601 
   3602 	if (!utf8_dir_verify(&args->oldname)) {
   3603 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3604 		goto final;
   3605 	}
   3606 
   3607 	if (!utf8_dir_verify(&args->newname)) {
   3608 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3609 		goto final;
   3610 	}
   3611 
   3612 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
   3613 	if (onm == NULL) {
   3614 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3615 		goto final;
   3616 	}
   3617 
   3618 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   3619 	if (nnm == NULL) {
   3620 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3621 		kmem_free(onm, olen);
   3622 		goto final;
   3623 	}
   3624 
   3625 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
   3626 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   3627 		kmem_free(onm, olen);
   3628 		kmem_free(nnm, nlen);
   3629 		goto final;
   3630 	}
   3631 
   3632 
   3633 	if (rdonly4(cs->exi, cs->vp, req)) {
   3634 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   3635 		kmem_free(onm,