Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/systm.h>
     34 #include <sys/cred.h>
     35 #include <sys/buf.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vfs_opreg.h>
     38 #include <sys/vnode.h>
     39 #include <sys/uio.h>
     40 #include <sys/errno.h>
     41 #include <sys/sysmacros.h>
     42 #include <sys/statvfs.h>
     43 #include <sys/kmem.h>
     44 #include <sys/dirent.h>
     45 #include <sys/cmn_err.h>
     46 #include <sys/debug.h>
     47 #include <sys/systeminfo.h>
     48 #include <sys/flock.h>
     49 #include <sys/pathname.h>
     50 #include <sys/nbmlock.h>
     51 #include <sys/share.h>
     52 #include <sys/atomic.h>
     53 #include <sys/policy.h>
     54 #include <sys/fem.h>
     55 #include <sys/sdt.h>
     56 #include <sys/ddi.h>
     57 #include <sys/zone.h>
     58 
     59 #include <rpc/types.h>
     60 #include <rpc/auth.h>
     61 #include <rpc/rpcsec_gss.h>
     62 #include <rpc/svc.h>
     63 
     64 #include <nfs/nfs.h>
     65 #include <nfs/export.h>
     66 #include <nfs/nfs_cmd.h>
     67 #include <nfs/lm.h>
     68 #include <nfs/nfs4.h>
     69 
     70 #include <sys/strsubr.h>
     71 #include <sys/strsun.h>
     72 
     73 #include <inet/common.h>
     74 #include <inet/ip.h>
     75 #include <inet/ip6.h>
     76 
     77 #include <sys/tsol/label.h>
     78 #include <sys/tsol/tndb.h>
     79 
     80 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
     81 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
     82 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
     83 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
     84 extern struct svc_ops rdma_svc_ops;
     85 /* End of Tunables */
     86 
     87 static int rdma_setup_read_data4(READ4args *, READ4res *);
     88 
     89 /*
     90  * Used to bump the stateid4.seqid value and show changes in the stateid
     91  */
     92 #define	next_stateid(sp) (++(sp)->bits.chgseq)
     93 
     94 /*
     95  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
     96  *	This is used to return NFS4ERR_TOOSMALL when clients specify
     97  *	maxcount that isn't large enough to hold the smallest possible
     98  *	XDR encoded dirent.
     99  *
    100  *	    sizeof cookie (8 bytes) +
    101  *	    sizeof name_len (4 bytes) +
    102  *	    sizeof smallest (padded) name (4 bytes) +
    103  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
    104  *	    sizeof attrlist4_len (4 bytes) +
    105  *	    sizeof next boolean (4 bytes)
    106  *
    107  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
    108  * the smallest possible entry4 (assumes no attrs requested).
    109  *	sizeof nfsstat4 (4 bytes) +
    110  *	sizeof verifier4 (8 bytes) +
    111  *	sizeof entry4list bool (4 bytes) +
    112  *	sizeof entry4 	(36 bytes) +
    113  *	sizeof eof bool  (4 bytes)
    114  *
    115  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
    116  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
    117  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
    118  *	required for a given name length.  MAXNAMELEN is the maximum
    119  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
    120  *	macros are to allow for . and .. entries -- just a minor tweak to try
    121  *	and guarantee that buffer we give to VOP_READDIR will be large enough
    122  *	to hold ., .., and the largest possible solaris dirent64.
    123  */
    124 #define	RFS4_MINLEN_ENTRY4 36
    125 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
    126 #define	RFS4_MINLEN_RDDIR_BUF \
    127 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
    128 
    129 /*
    130  * It would be better to pad to 4 bytes since that's what XDR would do,
    131  * but the dirents UFS gives us are already padded to 8, so just take
    132  * what we're given.  Dircount is only a hint anyway.  Currently the
    133  * solaris kernel is ASCII only, so there's no point in calling the
    134  * UTF8 functions.
    135  *
    136  * dirent64: named padded to provide 8 byte struct alignment
    137  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
    138  *
    139  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
    140  *
    141  */
    142 #define	DIRENT64_TO_DIRCOUNT(dp) \
    143 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
    144 
    145 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
    146 
    147 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
    148 
    149 u_longlong_t	nfs4_srv_caller_id;
    150 uint_t		nfs4_srv_vkey = 0;
    151 
    152 verifier4	Write4verf;
    153 verifier4	Readdir4verf;
    154 
    155 void	rfs4_init_compound_state(struct compound_state *);
    156 
    157 static void	nullfree(caddr_t);
    158 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    159 			struct compound_state *);
    160 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    161 			struct compound_state *);
    162 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    163 			struct compound_state *);
    164 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    165 			struct compound_state *);
    166 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    167 			struct compound_state *);
    168 static void	rfs4_op_create_free(nfs_resop4 *resop);
    169 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
    170 			struct svc_req *, struct compound_state *);
    171 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
    172 			struct svc_req *, struct compound_state *);
    173 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    174 			struct compound_state *);
    175 static void	rfs4_op_getattr_free(nfs_resop4 *);
    176 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    177 			struct compound_state *);
    178 static void	rfs4_op_getfh_free(nfs_resop4 *);
    179 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    180 			struct compound_state *);
    181 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    182 			struct compound_state *);
    183 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    184 			struct compound_state *);
    185 static void	lock_denied_free(nfs_resop4 *);
    186 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    187 			struct compound_state *);
    188 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    189 			struct compound_state *);
    190 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    191 			struct compound_state *);
    192 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    193 			struct compound_state *);
    194 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
    195 				struct svc_req *req, struct compound_state *cs);
    196 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    197 			struct compound_state *);
    198 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    199 			struct compound_state *);
    200 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
    201 			struct svc_req *, struct compound_state *);
    202 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
    203 			struct svc_req *, struct compound_state *);
    204 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    205 			struct compound_state *);
    206 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    207 			struct compound_state *);
    208 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    209 			struct compound_state *);
    210 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    211 			struct compound_state *);
    212 static void	rfs4_op_read_free(nfs_resop4 *);
    213 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
    214 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    215 			struct compound_state *);
    216 static void	rfs4_op_readlink_free(nfs_resop4 *);
    217 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
    218 			struct svc_req *, struct compound_state *);
    219 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    220 			struct compound_state *);
    221 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    222 			struct compound_state *);
    223 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    224 			struct compound_state *);
    225 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    226 			struct compound_state *);
    227 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    228 			struct compound_state *);
    229 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    230 			struct compound_state *);
    231 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    232 			struct compound_state *);
    233 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    234 			struct compound_state *);
    235 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
    236 			struct svc_req *, struct compound_state *);
    237 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
    238 			struct svc_req *req, struct compound_state *);
    239 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    240 			struct compound_state *);
    241 static void	rfs4_op_secinfo_free(nfs_resop4 *);
    242 
    243 static nfsstat4 check_open_access(uint32_t,
    244 				struct compound_state *, struct svc_req *);
    245 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
    246 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
    247 
    248 /*
    249  * translation table for attrs
    250  */
    251 struct nfs4_ntov_table {
    252 	union nfs4_attr_u *na;
    253 	uint8_t amap[NFS4_MAXNUM_ATTRS];
    254 	int attrcnt;
    255 	bool_t vfsstat;
    256 };
    257 
    258 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
    259 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
    260 				    struct nfs4_svgetit_arg *sargp);
    261 
    262 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
    263 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
    264 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
    265 
    266 fem_t		*deleg_rdops;
    267 fem_t		*deleg_wrops;
    268 
    269 rfs4_servinst_t *rfs4_cur_servinst = NULL;	/* current server instance */
    270 kmutex_t	rfs4_servinst_lock;	/* protects linked list */
    271 int		rfs4_seen_first_compound;	/* set first time we see one */
    272 
    273 /*
    274  * NFS4 op dispatch table
    275  */
    276 
    277 struct rfsv4disp {
    278 	void	(*dis_proc)();		/* proc to call */
    279 	void	(*dis_resfree)();	/* frees space allocated by proc */
    280 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
    281 };
    282 
    283 static struct rfsv4disp rfsv4disptab[] = {
    284 	/*
    285 	 * NFS VERSION 4
    286 	 */
    287 
    288 	/* RFS_NULL = 0 */
    289 	{rfs4_op_illegal, nullfree, 0},
    290 
    291 	/* UNUSED = 1 */
    292 	{rfs4_op_illegal, nullfree, 0},
    293 
    294 	/* UNUSED = 2 */
    295 	{rfs4_op_illegal, nullfree, 0},
    296 
    297 	/* OP_ACCESS = 3 */
    298 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
    299 
    300 	/* OP_CLOSE = 4 */
    301 	{rfs4_op_close, nullfree, 0},
    302 
    303 	/* OP_COMMIT = 5 */
    304 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
    305 
    306 	/* OP_CREATE = 6 */
    307 	{rfs4_op_create, nullfree, 0},
    308 
    309 	/* OP_DELEGPURGE = 7 */
    310 	{rfs4_op_delegpurge, nullfree, 0},
    311 
    312 	/* OP_DELEGRETURN = 8 */
    313 	{rfs4_op_delegreturn, nullfree, 0},
    314 
    315 	/* OP_GETATTR = 9 */
    316 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
    317 
    318 	/* OP_GETFH = 10 */
    319 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
    320 
    321 	/* OP_LINK = 11 */
    322 	{rfs4_op_link, nullfree, 0},
    323 
    324 	/* OP_LOCK = 12 */
    325 	{rfs4_op_lock, lock_denied_free, 0},
    326 
    327 	/* OP_LOCKT = 13 */
    328 	{rfs4_op_lockt, lock_denied_free, 0},
    329 
    330 	/* OP_LOCKU = 14 */
    331 	{rfs4_op_locku, nullfree, 0},
    332 
    333 	/* OP_LOOKUP = 15 */
    334 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
    335 
    336 	/* OP_LOOKUPP = 16 */
    337 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
    338 
    339 	/* OP_NVERIFY = 17 */
    340 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
    341 
    342 	/* OP_OPEN = 18 */
    343 	{rfs4_op_open, rfs4_free_reply, 0},
    344 
    345 	/* OP_OPENATTR = 19 */
    346 	{rfs4_op_openattr, nullfree, 0},
    347 
    348 	/* OP_OPEN_CONFIRM = 20 */
    349 	{rfs4_op_open_confirm, nullfree, 0},
    350 
    351 	/* OP_OPEN_DOWNGRADE = 21 */
    352 	{rfs4_op_open_downgrade, nullfree, 0},
    353 
    354 	/* OP_OPEN_PUTFH = 22 */
    355 	{rfs4_op_putfh, nullfree, RPC_ALL},
    356 
    357 	/* OP_PUTPUBFH = 23 */
    358 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
    359 
    360 	/* OP_PUTROOTFH = 24 */
    361 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
    362 
    363 	/* OP_READ = 25 */
    364 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
    365 
    366 	/* OP_READDIR = 26 */
    367 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
    368 
    369 	/* OP_READLINK = 27 */
    370 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
    371 
    372 	/* OP_REMOVE = 28 */
    373 	{rfs4_op_remove, nullfree, 0},
    374 
    375 	/* OP_RENAME = 29 */
    376 	{rfs4_op_rename, nullfree, 0},
    377 
    378 	/* OP_RENEW = 30 */
    379 	{rfs4_op_renew, nullfree, 0},
    380 
    381 	/* OP_RESTOREFH = 31 */
    382 	{rfs4_op_restorefh, nullfree, RPC_ALL},
    383 
    384 	/* OP_SAVEFH = 32 */
    385 	{rfs4_op_savefh, nullfree, RPC_ALL},
    386 
    387 	/* OP_SECINFO = 33 */
    388 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
    389 
    390 	/* OP_SETATTR = 34 */
    391 	{rfs4_op_setattr, nullfree, 0},
    392 
    393 	/* OP_SETCLIENTID = 35 */
    394 	{rfs4_op_setclientid, nullfree, 0},
    395 
    396 	/* OP_SETCLIENTID_CONFIRM = 36 */
    397 	{rfs4_op_setclientid_confirm, nullfree, 0},
    398 
    399 	/* OP_VERIFY = 37 */
    400 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
    401 
    402 	/* OP_WRITE = 38 */
    403 	{rfs4_op_write, nullfree, 0},
    404 
    405 	/* OP_RELEASE_LOCKOWNER = 39 */
    406 	{rfs4_op_release_lockowner, nullfree, 0},
    407 };
    408 
    409 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
    410 
    411 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
    412 
    413 #ifdef DEBUG
    414 
    415 int		rfs4_fillone_debug = 0;
    416 int		rfs4_no_stub_access = 1;
    417 int		rfs4_rddir_debug = 0;
    418 
    419 static char    *rfs4_op_string[] = {
    420 	"rfs4_op_null",
    421 	"rfs4_op_1 unused",
    422 	"rfs4_op_2 unused",
    423 	"rfs4_op_access",
    424 	"rfs4_op_close",
    425 	"rfs4_op_commit",
    426 	"rfs4_op_create",
    427 	"rfs4_op_delegpurge",
    428 	"rfs4_op_delegreturn",
    429 	"rfs4_op_getattr",
    430 	"rfs4_op_getfh",
    431 	"rfs4_op_link",
    432 	"rfs4_op_lock",
    433 	"rfs4_op_lockt",
    434 	"rfs4_op_locku",
    435 	"rfs4_op_lookup",
    436 	"rfs4_op_lookupp",
    437 	"rfs4_op_nverify",
    438 	"rfs4_op_open",
    439 	"rfs4_op_openattr",
    440 	"rfs4_op_open_confirm",
    441 	"rfs4_op_open_downgrade",
    442 	"rfs4_op_putfh",
    443 	"rfs4_op_putpubfh",
    444 	"rfs4_op_putrootfh",
    445 	"rfs4_op_read",
    446 	"rfs4_op_readdir",
    447 	"rfs4_op_readlink",
    448 	"rfs4_op_remove",
    449 	"rfs4_op_rename",
    450 	"rfs4_op_renew",
    451 	"rfs4_op_restorefh",
    452 	"rfs4_op_savefh",
    453 	"rfs4_op_secinfo",
    454 	"rfs4_op_setattr",
    455 	"rfs4_op_setclientid",
    456 	"rfs4_op_setclient_confirm",
    457 	"rfs4_op_verify",
    458 	"rfs4_op_write",
    459 	"rfs4_op_release_lockowner",
    460 	"rfs4_op_illegal"
    461 };
    462 #endif
    463 
    464 void	rfs4_ss_chkclid(rfs4_client_t *);
    465 
    466 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
    467 
    468 #ifdef	nextdp
    469 #undef nextdp
    470 #endif
    471 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
    472 
    473 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
    474 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
    475 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
    476 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
    477 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
    478 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
    479 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
    480 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
    481 	NULL,			NULL
    482 };
    483 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
    484 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
    485 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
    486 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
    487 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
    488 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
    489 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
    490 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
    491 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
    492 	NULL,			NULL
    493 };
    494 
    495 int
    496 rfs4_srvrinit(void)
    497 {
    498 	timespec32_t verf;
    499 	int error;
    500 	extern void rfs4_attr_init();
    501 	extern krwlock_t rfs4_deleg_policy_lock;
    502 
    503 	/*
    504 	 * The following algorithm attempts to find a unique verifier
    505 	 * to be used as the write verifier returned from the server
    506 	 * to the client.  It is important that this verifier change
    507 	 * whenever the server reboots.  Of secondary importance, it
    508 	 * is important for the verifier to be unique between two
    509 	 * different servers.
    510 	 *
    511 	 * Thus, an attempt is made to use the system hostid and the
    512 	 * current time in seconds when the nfssrv kernel module is
    513 	 * loaded.  It is assumed that an NFS server will not be able
    514 	 * to boot and then to reboot in less than a second.  If the
    515 	 * hostid has not been set, then the current high resolution
    516 	 * time is used.  This will ensure different verifiers each
    517 	 * time the server reboots and minimize the chances that two
    518 	 * different servers will have the same verifier.
    519 	 * XXX - this is broken on LP64 kernels.
    520 	 */
    521 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
    522 	if (verf.tv_sec != 0) {
    523 		verf.tv_nsec = gethrestime_sec();
    524 	} else {
    525 		timespec_t tverf;
    526 
    527 		gethrestime(&tverf);
    528 		verf.tv_sec = (time_t)tverf.tv_sec;
    529 		verf.tv_nsec = tverf.tv_nsec;
    530 	}
    531 
    532 	Write4verf = *(uint64_t *)&verf;
    533 
    534 	rfs4_attr_init();
    535 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
    536 
    537 	/* Used to manage create/destroy of server state */
    538 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
    539 
    540 	/* Used to manage access to server instance linked list */
    541 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
    542 
    543 	/* Used to manage access to rfs4_deleg_policy */
    544 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
    545 
    546 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
    547 	if (error != 0) {
    548 		rfs4_disable_delegation();
    549 	} else {
    550 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
    551 		    &deleg_wrops);
    552 		if (error != 0) {
    553 			rfs4_disable_delegation();
    554 			fem_free(deleg_rdops);
    555 		}
    556 	}
    557 
    558 	nfs4_srv_caller_id = fs_new_caller_id();
    559 
    560 	lockt_sysid = lm_alloc_sysidt();
    561 
    562 	vsd_create(&nfs4_srv_vkey, NULL);
    563 
    564 	return (0);
    565 }
    566 
    567 void
    568 rfs4_srvrfini(void)
    569 {
    570 	extern krwlock_t rfs4_deleg_policy_lock;
    571 
    572 	if (lockt_sysid != LM_NOSYSID) {
    573 		lm_free_sysidt(lockt_sysid);
    574 		lockt_sysid = LM_NOSYSID;
    575 	}
    576 
    577 	mutex_destroy(&rfs4_deleg_lock);
    578 	mutex_destroy(&rfs4_state_lock);
    579 	rw_destroy(&rfs4_deleg_policy_lock);
    580 
    581 	fem_free(deleg_rdops);
    582 	fem_free(deleg_wrops);
    583 }
    584 
    585 void
    586 rfs4_init_compound_state(struct compound_state *cs)
    587 {
    588 	bzero(cs, sizeof (*cs));
    589 	cs->cont = TRUE;
    590 	cs->access = CS_ACCESS_DENIED;
    591 	cs->deleg = FALSE;
    592 	cs->mandlock = FALSE;
    593 	cs->fh.nfs_fh4_val = cs->fhbuf;
    594 }
    595 
    596 void
    597 rfs4_grace_start(rfs4_servinst_t *sip)
    598 {
    599 	rw_enter(&sip->rwlock, RW_WRITER);
    600 	sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
    601 	sip->grace_period = rfs4_grace_period;
    602 	rw_exit(&sip->rwlock);
    603 }
    604 
    605 /*
    606  * returns true if the instance's grace period has never been started
    607  */
    608 int
    609 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
    610 {
    611 	time_t start_time;
    612 
    613 	rw_enter(&sip->rwlock, RW_READER);
    614 	start_time = sip->start_time;
    615 	rw_exit(&sip->rwlock);
    616 
    617 	return (start_time == 0);
    618 }
    619 
    620 /*
    621  * Indicates if server instance is within the
    622  * grace period.
    623  */
    624 int
    625 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
    626 {
    627 	time_t grace_expiry;
    628 
    629 	rw_enter(&sip->rwlock, RW_READER);
    630 	grace_expiry = sip->start_time + sip->grace_period;
    631 	rw_exit(&sip->rwlock);
    632 
    633 	return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
    634 }
    635 
    636 int
    637 rfs4_clnt_in_grace(rfs4_client_t *cp)
    638 {
    639 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    640 
    641 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
    642 }
    643 
    644 /*
    645  * reset all currently active grace periods
    646  */
    647 void
    648 rfs4_grace_reset_all(void)
    649 {
    650 	rfs4_servinst_t *sip;
    651 
    652 	mutex_enter(&rfs4_servinst_lock);
    653 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
    654 		if (rfs4_servinst_in_grace(sip))
    655 			rfs4_grace_start(sip);
    656 	mutex_exit(&rfs4_servinst_lock);
    657 }
    658 
    659 /*
    660  * start any new instances' grace periods
    661  */
    662 void
    663 rfs4_grace_start_new(void)
    664 {
    665 	rfs4_servinst_t *sip;
    666 
    667 	mutex_enter(&rfs4_servinst_lock);
    668 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
    669 		if (rfs4_servinst_grace_new(sip))
    670 			rfs4_grace_start(sip);
    671 	mutex_exit(&rfs4_servinst_lock);
    672 }
    673 
    674 static rfs4_dss_path_t *
    675 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
    676 {
    677 	size_t len;
    678 	rfs4_dss_path_t *dss_path;
    679 
    680 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
    681 
    682 	/*
    683 	 * Take a copy of the string, since the original may be overwritten.
    684 	 * Sadly, no strdup() in the kernel.
    685 	 */
    686 	/* allow for NUL */
    687 	len = strlen(path) + 1;
    688 	dss_path->path = kmem_alloc(len, KM_SLEEP);
    689 	(void) strlcpy(dss_path->path, path, len);
    690 
    691 	/* associate with servinst */
    692 	dss_path->sip = sip;
    693 	dss_path->index = index;
    694 
    695 	/*
    696 	 * Add to list of served paths.
    697 	 * No locking required, as we're only ever called at startup.
    698 	 */
    699 	if (rfs4_dss_pathlist == NULL) {
    700 		/* this is the first dss_path_t */
    701 
    702 		/* needed for insque/remque */
    703 		dss_path->next = dss_path->prev = dss_path;
    704 
    705 		rfs4_dss_pathlist = dss_path;
    706 	} else {
    707 		insque(dss_path, rfs4_dss_pathlist);
    708 	}
    709 
    710 	return (dss_path);
    711 }
    712 
    713 /*
    714  * Create a new server instance, and make it the currently active instance.
    715  * Note that starting the grace period too early will reduce the clients'
    716  * recovery window.
    717  */
    718 void
    719 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
    720 {
    721 	unsigned i;
    722 	rfs4_servinst_t *sip;
    723 	rfs4_oldstate_t *oldstate;
    724 
    725 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
    726 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
    727 
    728 	sip->start_time = (time_t)0;
    729 	sip->grace_period = (time_t)0;
    730 	sip->next = NULL;
    731 	sip->prev = NULL;
    732 
    733 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
    734 	/*
    735 	 * This initial dummy entry is required to setup for insque/remque.
    736 	 * It must be skipped over whenever the list is traversed.
    737 	 */
    738 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
    739 	/* insque/remque require initial list entry to be self-terminated */
    740 	oldstate->next = oldstate;
    741 	oldstate->prev = oldstate;
    742 	sip->oldstate = oldstate;
    743 
    744 
    745 	sip->dss_npaths = dss_npaths;
    746 	sip->dss_paths = kmem_alloc(dss_npaths *
    747 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
    748 
    749 	for (i = 0; i < dss_npaths; i++) {
    750 		sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
    751 	}
    752 
    753 	mutex_enter(&rfs4_servinst_lock);
    754 	if (rfs4_cur_servinst != NULL) {
    755 		/* add to linked list */
    756 		sip->prev = rfs4_cur_servinst;
    757 		rfs4_cur_servinst->next = sip;
    758 	}
    759 	if (start_grace)
    760 		rfs4_grace_start(sip);
    761 	/* make the new instance "current" */
    762 	rfs4_cur_servinst = sip;
    763 
    764 	mutex_exit(&rfs4_servinst_lock);
    765 }
    766 
    767 /*
    768  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
    769  * all instances directly.
    770  */
    771 void
    772 rfs4_servinst_destroy_all(void)
    773 {
    774 	rfs4_servinst_t *sip, *prev, *current;
    775 #ifdef DEBUG
    776 	int n = 0;
    777 #endif
    778 
    779 	mutex_enter(&rfs4_servinst_lock);
    780 	ASSERT(rfs4_cur_servinst != NULL);
    781 	current = rfs4_cur_servinst;
    782 	rfs4_cur_servinst = NULL;
    783 	for (sip = current; sip != NULL; sip = prev) {
    784 		prev = sip->prev;
    785 		rw_destroy(&sip->rwlock);
    786 		if (sip->oldstate)
    787 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
    788 		if (sip->dss_paths)
    789 			kmem_free(sip->dss_paths,
    790 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
    791 		kmem_free(sip, sizeof (rfs4_servinst_t));
    792 #ifdef DEBUG
    793 		n++;
    794 #endif
    795 	}
    796 	mutex_exit(&rfs4_servinst_lock);
    797 }
    798 
    799 /*
    800  * Assign the current server instance to a client_t.
    801  * Should be called with cp->rc_dbe held.
    802  */
    803 void
    804 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
    805 {
    806 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    807 
    808 	/*
    809 	 * The lock ensures that if the current instance is in the process
    810 	 * of changing, we will see the new one.
    811 	 */
    812 	mutex_enter(&rfs4_servinst_lock);
    813 	cp->rc_server_instance = sip;
    814 	mutex_exit(&rfs4_servinst_lock);
    815 }
    816 
    817 rfs4_servinst_t *
    818 rfs4_servinst(rfs4_client_t *cp)
    819 {
    820 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    821 
    822 	return (cp->rc_server_instance);
    823 }
    824 
    825 /* ARGSUSED */
    826 static void
    827 nullfree(caddr_t resop)
    828 {
    829 }
    830 
    831 /*
    832  * This is a fall-through for invalid or not implemented (yet) ops
    833  */
    834 /* ARGSUSED */
    835 static void
    836 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    837 	struct compound_state *cs)
    838 {
    839 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
    840 }
    841 
    842 /*
    843  * Check if the security flavor, nfsnum, is in the flavor_list.
    844  */
    845 bool_t
    846 in_flavor_list(int nfsnum, int *flavor_list, int count)
    847 {
    848 	int i;
    849 
    850 	for (i = 0; i < count; i++) {
    851 		if (nfsnum == flavor_list[i])
    852 			return (TRUE);
    853 	}
    854 	return (FALSE);
    855 }
    856 
    857 /*
    858  * Used by rfs4_op_secinfo to get the security information from the
    859  * export structure associated with the component.
    860  */
    861 /* ARGSUSED */
    862 static nfsstat4
    863 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
    864 {
    865 	int error, different_export = 0;
    866 	vnode_t *dvp, *vp, *tvp;
    867 	struct exportinfo *exi = NULL;
    868 	fid_t fid;
    869 	uint_t count, i;
    870 	secinfo4 *resok_val;
    871 	struct secinfo *secp;
    872 	seconfig_t *si;
    873 	bool_t did_traverse;
    874 	int dotdot, walk;
    875 
    876 	dvp = cs->vp;
    877 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
    878 
    879 	/*
    880 	 * If dotdotting, then need to check whether it's above the
    881 	 * root of a filesystem, or above an export point.
    882 	 */
    883 	if (dotdot) {
    884 
    885 		/*
    886 		 * If dotdotting at the root of a filesystem, then
    887 		 * need to traverse back to the mounted-on filesystem
    888 		 * and do the dotdot lookup there.
    889 		 */
    890 		if (cs->vp->v_flag & VROOT) {
    891 
    892 			/*
    893 			 * If at the system root, then can
    894 			 * go up no further.
    895 			 */
    896 			if (VN_CMP(dvp, rootdir))
    897 				return (puterrno4(ENOENT));
    898 
    899 			/*
    900 			 * Traverse back to the mounted-on filesystem
    901 			 */
    902 			dvp = untraverse(cs->vp);
    903 
    904 			/*
    905 			 * Set the different_export flag so we remember
    906 			 * to pick up a new exportinfo entry for
    907 			 * this new filesystem.
    908 			 */
    909 			different_export = 1;
    910 		} else {
    911 
    912 			/*
    913 			 * If dotdotting above an export point then set
    914 			 * the different_export to get new export info.
    915 			 */
    916 			different_export = nfs_exported(cs->exi, cs->vp);
    917 		}
    918 	}
    919 
    920 	/*
    921 	 * Get the vnode for the component "nm".
    922 	 */
    923 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
    924 	    NULL, NULL, NULL);
    925 	if (error)
    926 		return (puterrno4(error));
    927 
    928 	/*
    929 	 * If the vnode is in a pseudo filesystem, or if the security flavor
    930 	 * used in the request is valid but not an explicitly shared flavor,
    931 	 * or the access bit indicates that this is a limited access,
    932 	 * check whether this vnode is visible.
    933 	 */
    934 	if (!different_export &&
    935 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
    936 	    cs->access & CS_ACCESS_LIMITED)) {
    937 		if (! nfs_visible(cs->exi, vp, &different_export)) {
    938 			VN_RELE(vp);
    939 			return (puterrno4(ENOENT));
    940 		}
    941 	}
    942 
    943 	/*
    944 	 * If it's a mountpoint, then traverse it.
    945 	 */
    946 	if (vn_ismntpt(vp)) {
    947 		tvp = vp;
    948 		if ((error = traverse(&tvp)) != 0) {
    949 			VN_RELE(vp);
    950 			return (puterrno4(error));
    951 		}
    952 		/* remember that we had to traverse mountpoint */
    953 		did_traverse = TRUE;
    954 		vp = tvp;
    955 		different_export = 1;
    956 	} else if (vp->v_vfsp != dvp->v_vfsp) {
    957 		/*
    958 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
    959 		 * then vp is probably an LOFS object.  We don't need the
    960 		 * realvp, we just need to know that we might have crossed
    961 		 * a server fs boundary and need to call checkexport4.
    962 		 * (LOFS lookup hides server fs mountpoints, and actually calls
    963 		 * traverse)
    964 		 */
    965 		different_export = 1;
    966 		did_traverse = FALSE;
    967 	}
    968 
    969 	/*
    970 	 * Get the export information for it.
    971 	 */
    972 	if (different_export) {
    973 
    974 		bzero(&fid, sizeof (fid));
    975 		fid.fid_len = MAXFIDSZ;
    976 		error = vop_fid_pseudo(vp, &fid);
    977 		if (error) {
    978 			VN_RELE(vp);
    979 			return (puterrno4(error));
    980 		}
    981 
    982 		if (dotdot)
    983 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
    984 		else
    985 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
    986 
    987 		if (exi == NULL) {
    988 			if (did_traverse == TRUE) {
    989 				/*
    990 				 * If this vnode is a mounted-on vnode,
    991 				 * but the mounted-on file system is not
    992 				 * exported, send back the secinfo for
    993 				 * the exported node that the mounted-on
    994 				 * vnode lives in.
    995 				 */
    996 				exi = cs->exi;
    997 			} else {
    998 				VN_RELE(vp);
    999 				return (puterrno4(EACCES));
   1000 			}
   1001 		}
   1002 	} else {
   1003 		exi = cs->exi;
   1004 	}
   1005 	ASSERT(exi != NULL);
   1006 
   1007 
   1008 	/*
   1009 	 * Create the secinfo result based on the security information
   1010 	 * from the exportinfo structure (exi).
   1011 	 *
   1012 	 * Return all flavors for a pseudo node.
   1013 	 * For a real export node, return the flavor that the client
   1014 	 * has access with.
   1015 	 */
   1016 	ASSERT(RW_LOCK_HELD(&exported_lock));
   1017 	if (PSEUDO(exi)) {
   1018 		count = exi->exi_export.ex_seccnt; /* total sec count */
   1019 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
   1020 		secp = exi->exi_export.ex_secinfo;
   1021 
   1022 		for (i = 0; i < count; i++) {
   1023 			si = &secp[i].s_secinfo;
   1024 			resok_val[i].flavor = si->sc_rpcnum;
   1025 			if (resok_val[i].flavor == RPCSEC_GSS) {
   1026 				rpcsec_gss_info *info;
   1027 
   1028 				info = &resok_val[i].flavor_info;
   1029 				info->qop = si->sc_qop;
   1030 				info->service = (rpc_gss_svc_t)si->sc_service;
   1031 
   1032 				/* get oid opaque data */
   1033 				info->oid.sec_oid4_len =
   1034 				    si->sc_gss_mech_type->length;
   1035 				info->oid.sec_oid4_val = kmem_alloc(
   1036 				    si->sc_gss_mech_type->length, KM_SLEEP);
   1037 				bcopy(
   1038 				    si->sc_gss_mech_type->elements,
   1039 				    info->oid.sec_oid4_val,
   1040 				    info->oid.sec_oid4_len);
   1041 			}
   1042 		}
   1043 		resp->SECINFO4resok_len = count;
   1044 		resp->SECINFO4resok_val = resok_val;
   1045 	} else {
   1046 		int ret_cnt = 0, k = 0;
   1047 		int *flavor_list;
   1048 
   1049 		count = exi->exi_export.ex_seccnt; /* total sec count */
   1050 		secp = exi->exi_export.ex_secinfo;
   1051 
   1052 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
   1053 		/* find out which flavors to return */
   1054 		for (i = 0; i < count; i ++) {
   1055 			int access, flavor, perm;
   1056 
   1057 			flavor = secp[i].s_secinfo.sc_nfsnum;
   1058 			perm = secp[i].s_flags;
   1059 
   1060 			access = nfsauth4_secinfo_access(exi, cs->req,
   1061 			    flavor, perm);
   1062 
   1063 			if (! (access & NFSAUTH_DENIED) &&
   1064 			    ! (access & NFSAUTH_WRONGSEC)) {
   1065 				flavor_list[ret_cnt] = flavor;
   1066 				ret_cnt++;
   1067 			}
   1068 		}
   1069 
   1070 		/* Create the returning SECINFO value */
   1071 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
   1072 
   1073 		for (i = 0; i < count; i++) {
   1074 			/*
   1075 			 * If the flavor is in the flavor list,
   1076 			 * fill in resok_val.
   1077 			 */
   1078 			si = &secp[i].s_secinfo;
   1079 			if (in_flavor_list(si->sc_nfsnum,
   1080 			    flavor_list, ret_cnt)) {
   1081 				resok_val[k].flavor = si->sc_rpcnum;
   1082 				if (resok_val[k].flavor == RPCSEC_GSS) {
   1083 					rpcsec_gss_info *info;
   1084 
   1085 					info = &resok_val[k].flavor_info;
   1086 					info->qop = si->sc_qop;
   1087 					info->service = (rpc_gss_svc_t)
   1088 					    si->sc_service;
   1089 
   1090 					/* get oid opaque data */
   1091 					info->oid.sec_oid4_len =
   1092 					    si->sc_gss_mech_type->length;
   1093 					info->oid.sec_oid4_val = kmem_alloc(
   1094 					    si->sc_gss_mech_type->length,
   1095 					    KM_SLEEP);
   1096 					bcopy(si->sc_gss_mech_type->elements,
   1097 					    info->oid.sec_oid4_val,
   1098 					    info->oid.sec_oid4_len);
   1099 				}
   1100 				k++;
   1101 			}
   1102 			if (k >= ret_cnt)
   1103 				break;
   1104 		}
   1105 		resp->SECINFO4resok_len = ret_cnt;
   1106 		resp->SECINFO4resok_val = resok_val;
   1107 		kmem_free(flavor_list, count * sizeof (int));
   1108 	}
   1109 
   1110 	VN_RELE(vp);
   1111 	return (NFS4_OK);
   1112 }
   1113 
   1114 /*
   1115  * SECINFO (Operation 33): Obtain required security information on
   1116  * the component name in the format of (security-mechanism-oid, qop, service)
   1117  * triplets.
   1118  */
   1119 /* ARGSUSED */
   1120 static void
   1121 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1122     struct compound_state *cs)
   1123 {
   1124 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
   1125 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
   1126 	utf8string *utfnm = &args->name;
   1127 	uint_t len;
   1128 	char *nm;
   1129 	struct sockaddr *ca;
   1130 	char *name = NULL;
   1131 
   1132 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
   1133 	    SECINFO4args *, args);
   1134 
   1135 	/*
   1136 	 * Current file handle (cfh) should have been set before getting
   1137 	 * into this function. If not, return error.
   1138 	 */
   1139 	if (cs->vp == NULL) {
   1140 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1141 		goto out;
   1142 	}
   1143 
   1144 	if (cs->vp->v_type != VDIR) {
   1145 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1146 		goto out;
   1147 	}
   1148 
   1149 	/*
   1150 	 * Verify the component name. If failed, error out, but
   1151 	 * do not error out if the component name is a "..".
   1152 	 * SECINFO will return its parents secinfo data for SECINFO "..".
   1153 	 */
   1154 	if (!utf8_dir_verify(utfnm)) {
   1155 		if (utfnm->utf8string_len != 2 ||
   1156 		    utfnm->utf8string_val[0] != '.' ||
   1157 		    utfnm->utf8string_val[1] != '.') {
   1158 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1159 			goto out;
   1160 		}
   1161 	}
   1162 
   1163 	nm = utf8_to_str(utfnm, &len, NULL);
   1164 	if (nm == NULL) {
   1165 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1166 		goto out;
   1167 	}
   1168 
   1169 	if (len > MAXNAMELEN) {
   1170 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1171 		kmem_free(nm, len);
   1172 		goto out;
   1173 	}
   1174 	/* If necessary, convert to UTF-8 for illbehaved clients */
   1175 
   1176 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   1177 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   1178 	    MAXPATHLEN  + 1);
   1179 
   1180 	if (name == NULL) {
   1181 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1182 		kmem_free(nm, len);
   1183 		goto out;
   1184 	}
   1185 
   1186 
   1187 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
   1188 
   1189 	if (name != nm)
   1190 		kmem_free(name, MAXPATHLEN + 1);
   1191 	kmem_free(nm, len);
   1192 
   1193 out:
   1194 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
   1195 	    SECINFO4res *, resp);
   1196 }
   1197 
   1198 /*
   1199  * Free SECINFO result.
   1200  */
   1201 /* ARGSUSED */
   1202 static void
   1203 rfs4_op_secinfo_free(nfs_resop4 *resop)
   1204 {
   1205 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
   1206 	int count, i;
   1207 	secinfo4 *resok_val;
   1208 
   1209 	/* If this is not an Ok result, nothing to free. */
   1210 	if (resp->status != NFS4_OK) {
   1211 		return;
   1212 	}
   1213 
   1214 	count = resp->SECINFO4resok_len;
   1215 	resok_val = resp->SECINFO4resok_val;
   1216 
   1217 	for (i = 0; i < count; i++) {
   1218 		if (resok_val[i].flavor == RPCSEC_GSS) {
   1219 			rpcsec_gss_info *info;
   1220 
   1221 			info = &resok_val[i].flavor_info;
   1222 			kmem_free(info->oid.sec_oid4_val,
   1223 			    info->oid.sec_oid4_len);
   1224 		}
   1225 	}
   1226 	kmem_free(resok_val, count * sizeof (secinfo4));
   1227 	resp->SECINFO4resok_len = 0;
   1228 	resp->SECINFO4resok_val = NULL;
   1229 }
   1230 
   1231 /* ARGSUSED */
   1232 static void
   1233 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1234     struct compound_state *cs)
   1235 {
   1236 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
   1237 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
   1238 	int error;
   1239 	vnode_t *vp;
   1240 	struct vattr va;
   1241 	int checkwriteperm;
   1242 	cred_t *cr = cs->cr;
   1243 	bslabel_t *clabel, *slabel;
   1244 	ts_label_t *tslabel;
   1245 	boolean_t admin_low_client;
   1246 
   1247 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
   1248 	    ACCESS4args *, args);
   1249 
   1250 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
   1251 	if (cs->access == CS_ACCESS_DENIED) {
   1252 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1253 		goto out;
   1254 	}
   1255 #endif
   1256 	if (cs->vp == NULL) {
   1257 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1258 		goto out;
   1259 	}
   1260 
   1261 	ASSERT(cr != NULL);
   1262 
   1263 	vp = cs->vp;
   1264 
   1265 	/*
   1266 	 * If the file system is exported read only, it is not appropriate
   1267 	 * to check write permissions for regular files and directories.
   1268 	 * Special files are interpreted by the client, so the underlying
   1269 	 * permissions are sent back to the client for interpretation.
   1270 	 */
   1271 	if (rdonly4(cs->exi, cs->vp, req) &&
   1272 	    (vp->v_type == VREG || vp->v_type == VDIR))
   1273 		checkwriteperm = 0;
   1274 	else
   1275 		checkwriteperm = 1;
   1276 
   1277 	/*
   1278 	 * XXX
   1279 	 * We need the mode so that we can correctly determine access
   1280 	 * permissions relative to a mandatory lock file.  Access to
   1281 	 * mandatory lock files is denied on the server, so it might
   1282 	 * as well be reflected to the server during the open.
   1283 	 */
   1284 	va.va_mask = AT_MODE;
   1285 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1286 	if (error) {
   1287 		*cs->statusp = resp->status = puterrno4(error);
   1288 		goto out;
   1289 	}
   1290 	resp->access = 0;
   1291 	resp->supported = 0;
   1292 
   1293 	if (is_system_labeled()) {
   1294 		ASSERT(req->rq_label != NULL);
   1295 		clabel = req->rq_label;
   1296 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
   1297 		    "got client label from request(1)",
   1298 		    struct svc_req *, req);
   1299 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   1300 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
   1301 				*cs->statusp = resp->status = puterrno4(EACCES);
   1302 				goto out;
   1303 			}
   1304 			slabel = label2bslabel(tslabel);
   1305 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
   1306 			    char *, "got server label(1) for vp(2)",
   1307 			    bslabel_t *, slabel, vnode_t *, vp);
   1308 
   1309 			admin_low_client = B_FALSE;
   1310 		} else
   1311 			admin_low_client = B_TRUE;
   1312 	}
   1313 
   1314 	if (args->access & ACCESS4_READ) {
   1315 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
   1316 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1317 		    (!is_system_labeled() || admin_low_client ||
   1318 		    bldominates(clabel, slabel)))
   1319 			resp->access |= ACCESS4_READ;
   1320 		resp->supported |= ACCESS4_READ;
   1321 	}
   1322 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
   1323 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1324 		if (!error && (!is_system_labeled() || admin_low_client ||
   1325 		    bldominates(clabel, slabel)))
   1326 			resp->access |= ACCESS4_LOOKUP;
   1327 		resp->supported |= ACCESS4_LOOKUP;
   1328 	}
   1329 	if (checkwriteperm &&
   1330 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
   1331 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1332 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1333 		    (!is_system_labeled() || admin_low_client ||
   1334 		    blequal(clabel, slabel)))
   1335 			resp->access |=
   1336 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
   1337 		resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
   1338 	}
   1339 
   1340 	if (checkwriteperm &&
   1341 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
   1342 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1343 		if (!error && (!is_system_labeled() || admin_low_client ||
   1344 		    blequal(clabel, slabel)))
   1345 			resp->access |= ACCESS4_DELETE;
   1346 		resp->supported |= ACCESS4_DELETE;
   1347 	}
   1348 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
   1349 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1350 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1351 		    (!is_system_labeled() || admin_low_client ||
   1352 		    bldominates(clabel, slabel)))
   1353 			resp->access |= ACCESS4_EXECUTE;
   1354 		resp->supported |= ACCESS4_EXECUTE;
   1355 	}
   1356 
   1357 	if (is_system_labeled() && !admin_low_client)
   1358 		label_rele(tslabel);
   1359 
   1360 	*cs->statusp = resp->status = NFS4_OK;
   1361 out:
   1362 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
   1363 	    ACCESS4res *, resp);
   1364 }
   1365 
   1366 /* ARGSUSED */
   1367 static void
   1368 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1369     struct compound_state *cs)
   1370 {
   1371 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
   1372 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
   1373 	int error;
   1374 	vnode_t *vp = cs->vp;
   1375 	cred_t *cr = cs->cr;
   1376 	vattr_t va;
   1377 
   1378 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
   1379 	    COMMIT4args *, args);
   1380 
   1381 	if (vp == NULL) {
   1382 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1383 		goto out;
   1384 	}
   1385 	if (cs->access == CS_ACCESS_DENIED) {
   1386 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1387 		goto out;
   1388 	}
   1389 
   1390 	if (args->offset + args->count < args->offset) {
   1391 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1392 		goto out;
   1393 	}
   1394 
   1395 	va.va_mask = AT_UID;
   1396 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1397 
   1398 	/*
   1399 	 * If we can't get the attributes, then we can't do the
   1400 	 * right access checking.  So, we'll fail the request.
   1401 	 */
   1402 	if (error) {
   1403 		*cs->statusp = resp->status = puterrno4(error);
   1404 		goto out;
   1405 	}
   1406 	if (rdonly4(cs->exi, cs->vp, req)) {
   1407 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1408 		goto out;
   1409 	}
   1410 
   1411 	if (vp->v_type != VREG) {
   1412 		if (vp->v_type == VDIR)
   1413 			resp->status = NFS4ERR_ISDIR;
   1414 		else
   1415 			resp->status = NFS4ERR_INVAL;
   1416 		*cs->statusp = resp->status;
   1417 		goto out;
   1418 	}
   1419 
   1420 	if (crgetuid(cr) != va.va_uid &&
   1421 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
   1422 		*cs->statusp = resp->status = puterrno4(error);
   1423 		goto out;
   1424 	}
   1425 
   1426 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
   1427 	if (!error)
   1428 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
   1429 
   1430 	if (error) {
   1431 		*cs->statusp = resp->status = puterrno4(error);
   1432 		goto out;
   1433 	}
   1434 
   1435 	*cs->statusp = resp->status = NFS4_OK;
   1436 	resp->writeverf = Write4verf;
   1437 out:
   1438 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
   1439 	    COMMIT4res *, resp);
   1440 }
   1441 
   1442 /*
   1443  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
   1444  * was completed. It does the nfsv4 create for special files.
   1445  */
   1446 /* ARGSUSED */
   1447 static vnode_t *
   1448 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
   1449     struct compound_state *cs, vattr_t *vap, char *nm)
   1450 {
   1451 	int error;
   1452 	cred_t *cr = cs->cr;
   1453 	vnode_t *dvp = cs->vp;
   1454 	vnode_t *vp = NULL;
   1455 	int mode;
   1456 	enum vcexcl excl;
   1457 
   1458 	switch (args->type) {
   1459 	case NF4CHR:
   1460 	case NF4BLK:
   1461 		if (secpolicy_sys_devices(cr) != 0) {
   1462 			*cs->statusp = resp->status = NFS4ERR_PERM;
   1463 			return (NULL);
   1464 		}
   1465 		if (args->type == NF4CHR)
   1466 			vap->va_type = VCHR;
   1467 		else
   1468 			vap->va_type = VBLK;
   1469 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
   1470 		    args->ftype4_u.devdata.specdata2);
   1471 		vap->va_mask |= AT_RDEV;
   1472 		break;
   1473 	case NF4SOCK:
   1474 		vap->va_type = VSOCK;
   1475 		break;
   1476 	case NF4FIFO:
   1477 		vap->va_type = VFIFO;
   1478 		break;
   1479 	default:
   1480 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1481 		return (NULL);
   1482 	}
   1483 
   1484 	/*
   1485 	 * Must specify the mode.
   1486 	 */
   1487 	if (!(vap->va_mask & AT_MODE)) {
   1488 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1489 		return (NULL);
   1490 	}
   1491 
   1492 	excl = EXCL;
   1493 
   1494 	mode = 0;
   1495 
   1496 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
   1497 	if (error) {
   1498 		*cs->statusp = resp->status = puterrno4(error);
   1499 		return (NULL);
   1500 	}
   1501 	return (vp);
   1502 }
   1503 
   1504 /*
   1505  * nfsv4 create is used to create non-regular files. For regular files,
   1506  * use nfsv4 open.
   1507  */
   1508 /* ARGSUSED */
   1509 static void
   1510 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1511     struct compound_state *cs)
   1512 {
   1513 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
   1514 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
   1515 	int error;
   1516 	struct vattr bva, iva, iva2, ava, *vap;
   1517 	cred_t *cr = cs->cr;
   1518 	vnode_t *dvp = cs->vp;
   1519 	vnode_t *vp = NULL;
   1520 	vnode_t *realvp;
   1521 	char *nm, *lnm;
   1522 	uint_t len, llen;
   1523 	int syncval = 0;
   1524 	struct nfs4_svgetit_arg sarg;
   1525 	struct nfs4_ntov_table ntov;
   1526 	struct statvfs64 sb;
   1527 	nfsstat4 status;
   1528 	struct sockaddr *ca;
   1529 	char *name = NULL;
   1530 	char *lname = NULL;
   1531 
   1532 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
   1533 	    CREATE4args *, args);
   1534 
   1535 	resp->attrset = 0;
   1536 
   1537 	if (dvp == NULL) {
   1538 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1539 		goto out;
   1540 	}
   1541 
   1542 	/*
   1543 	 * If there is an unshared filesystem mounted on this vnode,
   1544 	 * do not allow to create an object in this directory.
   1545 	 */
   1546 	if (vn_ismntpt(dvp)) {
   1547 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1548 		goto out;
   1549 	}
   1550 
   1551 	/* Verify that type is correct */
   1552 	switch (args->type) {
   1553 	case NF4LNK:
   1554 	case NF4BLK:
   1555 	case NF4CHR:
   1556 	case NF4SOCK:
   1557 	case NF4FIFO:
   1558 	case NF4DIR:
   1559 		break;
   1560 	default:
   1561 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1562 		goto out;
   1563 	};
   1564 
   1565 	if (cs->access == CS_ACCESS_DENIED) {
   1566 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1567 		goto out;
   1568 	}
   1569 	if (dvp->v_type != VDIR) {
   1570 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1571 		goto out;
   1572 	}
   1573 	if (!utf8_dir_verify(&args->objname)) {
   1574 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1575 		goto out;
   1576 	}
   1577 
   1578 	if (rdonly4(cs->exi, cs->vp, req)) {
   1579 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1580 		goto out;
   1581 	}
   1582 
   1583 	/*
   1584 	 * Name of newly created object
   1585 	 */
   1586 	nm = utf8_to_fn(&args->objname, &len, NULL);
   1587 	if (nm == NULL) {
   1588 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1589 		goto out;
   1590 	}
   1591 
   1592 	if (len > MAXNAMELEN) {
   1593 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1594 		kmem_free(nm, len);
   1595 		goto out;
   1596 	}
   1597 
   1598 	/* If necessary, convert to UTF-8 for poorly behaved clients */
   1599 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   1600 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   1601 	    MAXPATHLEN  + 1);
   1602 
   1603 	if (name == NULL) {
   1604 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1605 		kmem_free(nm, len);
   1606 		goto out;
   1607 	}
   1608 
   1609 	resp->attrset = 0;
   1610 
   1611 	sarg.sbp = &sb;
   1612 	nfs4_ntov_table_init(&ntov);
   1613 
   1614 	status = do_rfs4_set_attrs(&resp->attrset,
   1615 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
   1616 
   1617 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
   1618 		status = NFS4ERR_INVAL;
   1619 
   1620 	if (status != NFS4_OK) {
   1621 		*cs->statusp = resp->status = status;
   1622 		kmem_free(nm, len);
   1623 		nfs4_ntov_table_free(&ntov, &sarg);
   1624 		resp->attrset = 0;
   1625 		goto out;
   1626 	}
   1627 
   1628 	/* Get "before" change value */
   1629 	bva.va_mask = AT_CTIME|AT_SEQ;
   1630 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
   1631 	if (error) {
   1632 		*cs->statusp = resp->status = puterrno4(error);
   1633 		kmem_free(nm, len);
   1634 		nfs4_ntov_table_free(&ntov, &sarg);
   1635 		resp->attrset = 0;
   1636 		goto out;
   1637 	}
   1638 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
   1639 
   1640 	vap = sarg.vap;
   1641 
   1642 	/*
   1643 	 * Set default initial values for attributes when not specified
   1644 	 * in createattrs.
   1645 	 */
   1646 	if ((vap->va_mask & AT_UID) == 0) {
   1647 		vap->va_uid = crgetuid(cr);
   1648 		vap->va_mask |= AT_UID;
   1649 	}
   1650 	if ((vap->va_mask & AT_GID) == 0) {
   1651 		vap->va_gid = crgetgid(cr);
   1652 		vap->va_mask |= AT_GID;
   1653 	}
   1654 
   1655 	vap->va_mask |= AT_TYPE;
   1656 	switch (args->type) {
   1657 	case NF4DIR:
   1658 		vap->va_type = VDIR;
   1659 		if ((vap->va_mask & AT_MODE) == 0) {
   1660 			vap->va_mode = 0700;	/* default: owner rwx only */
   1661 			vap->va_mask |= AT_MODE;
   1662 		}
   1663 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr, NULL, 0, NULL);
   1664 		if (error)
   1665 			break;
   1666 
   1667 		/*
   1668 		 * Get the initial "after" sequence number, if it fails,
   1669 		 * set to zero
   1670 		 */
   1671 		iva.va_mask = AT_SEQ;
   1672 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1673 			iva.va_seq = 0;
   1674 		break;
   1675 	case NF4LNK:
   1676 		vap->va_type = VLNK;
   1677 		if ((vap->va_mask & AT_MODE) == 0) {
   1678 			vap->va_mode = 0700;	/* default: owner rwx only */
   1679 			vap->va_mask |= AT_MODE;
   1680 		}
   1681 
   1682 		/*
   1683 		 * symlink names must be treated as data
   1684 		 */
   1685 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
   1686 
   1687 		if (lnm == NULL) {
   1688 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1689 			if (name != nm)
   1690 				kmem_free(name, MAXPATHLEN + 1);
   1691 			kmem_free(nm, len);
   1692 			nfs4_ntov_table_free(&ntov, &sarg);
   1693 			resp->attrset = 0;
   1694 			goto out;
   1695 		}
   1696 
   1697 		if (llen > MAXPATHLEN) {
   1698 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1699 			if (name != nm)
   1700 				kmem_free(name, MAXPATHLEN + 1);
   1701 			kmem_free(nm, len);
   1702 			kmem_free(lnm, llen);
   1703 			nfs4_ntov_table_free(&ntov, &sarg);
   1704 			resp->attrset = 0;
   1705 			goto out;
   1706 		}
   1707 
   1708 		lname = nfscmd_convname(ca, cs->exi, lnm,
   1709 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
   1710 
   1711 		if (lname == NULL) {
   1712 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   1713 			if (name != nm)
   1714 				kmem_free(name, MAXPATHLEN + 1);
   1715 			kmem_free(nm, len);
   1716 			kmem_free(lnm, llen);
   1717 			nfs4_ntov_table_free(&ntov, &sarg);
   1718 			resp->attrset = 0;
   1719 			goto out;
   1720 		}
   1721 
   1722 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, NULL, 0);
   1723 		if (lname != lnm)
   1724 			kmem_free(lname, MAXPATHLEN + 1);
   1725 		if (lnm != NULL)
   1726 			kmem_free(lnm, llen);
   1727 		if (error)
   1728 			break;
   1729 
   1730 		/*
   1731 		 * Get the initial "after" sequence number, if it fails,
   1732 		 * set to zero
   1733 		 */
   1734 		iva.va_mask = AT_SEQ;
   1735 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1736 			iva.va_seq = 0;
   1737 
   1738 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr,
   1739 		    NULL, NULL, NULL);
   1740 		if (error)
   1741 			break;
   1742 
   1743 		/*
   1744 		 * va_seq is not safe over VOP calls, check it again
   1745 		 * if it has changed zero out iva to force atomic = FALSE.
   1746 		 */
   1747 		iva2.va_mask = AT_SEQ;
   1748 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
   1749 		    iva2.va_seq != iva.va_seq)
   1750 			iva.va_seq = 0;
   1751 		break;
   1752 	default:
   1753 		/*
   1754 		 * probably a special file.
   1755 		 */
   1756 		if ((vap->va_mask & AT_MODE) == 0) {
   1757 			vap->va_mode = 0600;	/* default: owner rw only */
   1758 			vap->va_mask |= AT_MODE;
   1759 		}
   1760 		syncval = FNODSYNC;
   1761 		/*
   1762 		 * We know this will only generate one VOP call
   1763 		 */
   1764 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
   1765 
   1766 		if (vp == NULL) {
   1767 			if (name != nm)
   1768 				kmem_free(name, MAXPATHLEN + 1);
   1769 			kmem_free(nm, len);
   1770 			nfs4_ntov_table_free(&ntov, &sarg);
   1771 			resp->attrset = 0;
   1772 			goto out;
   1773 		}
   1774 
   1775 		/*
   1776 		 * Get the initial "after" sequence number, if it fails,
   1777 		 * set to zero
   1778 		 */
   1779 		iva.va_mask = AT_SEQ;
   1780 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1781 			iva.va_seq = 0;
   1782 
   1783 		break;
   1784 	}
   1785 	if (name != nm)
   1786 		kmem_free(name, MAXPATHLEN + 1);
   1787 	kmem_free(nm, len);
   1788 
   1789 	if (error) {
   1790 		*cs->statusp = resp->status = puterrno4(error);
   1791 	}
   1792 
   1793 	/*
   1794 	 * Force modified data and metadata out to stable storage.
   1795 	 */
   1796 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
   1797 
   1798 	if (resp->status != NFS4_OK) {
   1799 		if (vp != NULL)
   1800 			VN_RELE(vp);
   1801 		nfs4_ntov_table_free(&ntov, &sarg);
   1802 		resp->attrset = 0;
   1803 		goto out;
   1804 	}
   1805 
   1806 	/*
   1807 	 * Finish setup of cinfo response, "before" value already set.
   1808 	 * Get "after" change value, if it fails, simply return the
   1809 	 * before value.
   1810 	 */
   1811 	ava.va_mask = AT_CTIME|AT_SEQ;
   1812 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
   1813 		ava.va_ctime = bva.va_ctime;
   1814 		ava.va_seq = 0;
   1815 	}
   1816 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
   1817 
   1818 	/*
   1819 	 * True verification that object was created with correct
   1820 	 * attrs is impossible.  The attrs could have been changed
   1821 	 * immediately after object creation.  If attributes did
   1822 	 * not verify, the only recourse for the server is to
   1823 	 * destroy the object.  Maybe if some attrs (like gid)
   1824 	 * are set incorrectly, the object should be destroyed;
   1825 	 * however, seems bad as a default policy.  Do we really
   1826 	 * want to destroy an object over one of the times not
   1827 	 * verifying correctly?  For these reasons, the server
   1828 	 * currently sets bits in attrset for createattrs
   1829 	 * that were set; however, no verification is done.
   1830 	 *
   1831 	 * vmask_to_nmask accounts for vattr bits set on create
   1832 	 *	[do_rfs4_set_attrs() only sets resp bits for
   1833 	 *	 non-vattr/vfs bits.]
   1834 	 * Mask off any bits set by default so as not to return
   1835 	 * more attrset bits than were requested in createattrs
   1836 	 */
   1837 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
   1838 	resp->attrset &= args->createattrs.attrmask;
   1839 	nfs4_ntov_table_free(&ntov, &sarg);
   1840 
   1841 	error = makefh4(&cs->fh, vp, cs->exi);
   1842 	if (error) {
   1843 		*cs->statusp = resp->status = puterrno4(error);
   1844 	}
   1845 
   1846 	/*
   1847 	 * The cinfo.atomic = TRUE only if we got no errors, we have
   1848 	 * non-zero va_seq's, and it has incremented by exactly one
   1849 	 * during the creation and it didn't change during the VOP_LOOKUP
   1850 	 * or VOP_FSYNC.
   1851 	 */
   1852 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
   1853 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
   1854 		resp->cinfo.atomic = TRUE;
   1855 	else
   1856 		resp->cinfo.atomic = FALSE;
   1857 
   1858 	/*
   1859 	 * Force modified metadata out to stable storage.
   1860 	 *
   1861 	 * if a underlying vp exists, pass it to VOP_FSYNC
   1862 	 */
   1863 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
   1864 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
   1865 	else
   1866 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
   1867 
   1868 	if (resp->status != NFS4_OK) {
   1869 		VN_RELE(vp);
   1870 		goto out;
   1871 	}
   1872 	if (cs->vp)
   1873 		VN_RELE(cs->vp);
   1874 
   1875 	cs->vp = vp;
   1876 	*cs->statusp = resp->status = NFS4_OK;
   1877 out:
   1878 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
   1879 	    CREATE4res *, resp);
   1880 }
   1881 
   1882 /*ARGSUSED*/
   1883 static void
   1884 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1885     struct compound_state *cs)
   1886 {
   1887 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
   1888 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
   1889 
   1890 	rfs4_op_inval(argop, resop, req, cs);
   1891 
   1892 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
   1893 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
   1894 }
   1895 
   1896 /*ARGSUSED*/
   1897 static void
   1898 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1899     struct compound_state *cs)
   1900 {
   1901 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
   1902 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
   1903 	rfs4_deleg_state_t *dsp;
   1904 	nfsstat4 status;
   1905 
   1906 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
   1907 	    DELEGRETURN4args *, args);
   1908 
   1909 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
   1910 	resp->status = *cs->statusp = status;
   1911 	if (status != NFS4_OK)
   1912 		goto out;
   1913 
   1914 	/* Ensure specified filehandle matches */
   1915 	if (cs->vp != dsp->rds_finfo->rf_vp) {
   1916 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
   1917 	} else
   1918 		rfs4_return_deleg(dsp, FALSE);
   1919 
   1920 	rfs4_update_lease(dsp->rds_client);
   1921 
   1922 	rfs4_deleg_state_rele(dsp);
   1923 out:
   1924 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
   1925 	    DELEGRETURN4res *, resp);
   1926 }
   1927 
   1928 /*
   1929  * Check to see if a given "flavor" is an explicitly shared flavor.
   1930  * The assumption of this routine is the "flavor" is already a valid
   1931  * flavor in the secinfo list of "exi".
   1932  *
   1933  *	e.g.
   1934  *		# share -o sec=flavor1 /export
   1935  *		# share -o sec=flavor2 /export/home
   1936  *
   1937  *		flavor2 is not an explicitly shared flavor for /export,
   1938  *		however it is in the secinfo list for /export thru the
   1939  *		server namespace setup.
   1940  */
   1941 int
   1942 is_exported_sec(int flavor, struct exportinfo *exi)
   1943 {
   1944 	int	i;
   1945 	struct secinfo *sp;
   1946 
   1947 	sp = exi->exi_export.ex_secinfo;
   1948 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
   1949 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
   1950 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
   1951 			return (SEC_REF_EXPORTED(&sp[i]));
   1952 		}
   1953 	}
   1954 
   1955 	/* Should not reach this point based on the assumption */
   1956 	return (0);
   1957 }
   1958 
   1959 /*
   1960  * Check if the security flavor used in the request matches what is
   1961  * required at the export point or at the root pseudo node (exi_root).
   1962  *
   1963  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
   1964  *
   1965  */
   1966 static int
   1967 secinfo_match_or_authnone(struct compound_state *cs)
   1968 {
   1969 	int	i;
   1970 	struct secinfo *sp;
   1971 
   1972 	/*
   1973 	 * Check cs->nfsflavor (from the request) against
   1974 	 * the current export data in cs->exi.
   1975 	 */
   1976 	sp = cs->exi->exi_export.ex_secinfo;
   1977 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
   1978 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
   1979 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
   1980 			return (1);
   1981 	}
   1982 
   1983 	return (0);
   1984 }
   1985 
   1986 /*
   1987  * Check the access authority for the client and return the correct error.
   1988  */
   1989 nfsstat4
   1990 call_checkauth4(struct compound_state *cs, struct svc_req *req)
   1991 {
   1992 	int	authres;
   1993 
   1994 	/*
   1995 	 * First, check if the security flavor used in the request
   1996 	 * are among the flavors set in the server namespace.
   1997 	 */
   1998 	if (!secinfo_match_or_authnone(cs)) {
   1999 		*cs->statusp = NFS4ERR_WRONGSEC;
   2000 		return (*cs->statusp);
   2001 	}
   2002 
   2003 	authres = checkauth4(cs, req);
   2004 
   2005 	if (authres > 0) {
   2006 		*cs->statusp = NFS4_OK;
   2007 		if (! (cs->access & CS_ACCESS_LIMITED))
   2008 			cs->access = CS_ACCESS_OK;
   2009 	} else if (authres == 0) {
   2010 		*cs->statusp = NFS4ERR_ACCESS;
   2011 	} else if (authres == -2) {
   2012 		*cs->statusp = NFS4ERR_WRONGSEC;
   2013 	} else {
   2014 		*cs->statusp = NFS4ERR_DELAY;
   2015 	}
   2016 	return (*cs->statusp);
   2017 }
   2018 
   2019 /*
   2020  * bitmap4_to_attrmask is called by getattr and readdir.
   2021  * It sets up the vattr mask and determines whether vfsstat call is needed
   2022  * based on the input bitmap.
   2023  * Returns nfsv4 status.
   2024  */
   2025 static nfsstat4
   2026 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
   2027 {
   2028 	int i;
   2029 	uint_t	va_mask;
   2030 	struct statvfs64 *sbp = sargp->sbp;
   2031 
   2032 	sargp->sbp = NULL;
   2033 	sargp->flag = 0;
   2034 	sargp->rdattr_error = NFS4_OK;
   2035 	sargp->mntdfid_set = FALSE;
   2036 	if (sargp->cs->vp)
   2037 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
   2038 		    FH4_ATTRDIR | FH4_NAMEDATTR);
   2039 	else
   2040 		sargp->xattr = 0;
   2041 
   2042 	/*
   2043 	 * Set rdattr_error_req to true if return error per
   2044 	 * failed entry rather than fail the readdir.
   2045 	 */
   2046 	if (breq & FATTR4_RDATTR_ERROR_MASK)
   2047 		sargp->rdattr_error_req = 1;
   2048 	else
   2049 		sargp->rdattr_error_req = 0;
   2050 
   2051 	/*
   2052 	 * generate the va_mask
   2053 	 * Handle the easy cases first
   2054 	 */
   2055 	switch (breq) {
   2056 	case NFS4_NTOV_ATTR_MASK:
   2057 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
   2058 		return (NFS4_OK);
   2059 
   2060 	case NFS4_FS_ATTR_MASK:
   2061 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
   2062 		sargp->sbp = sbp;
   2063 		return (NFS4_OK);
   2064 
   2065 	case NFS4_NTOV_ATTR_CACHE_MASK:
   2066 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
   2067 		return (NFS4_OK);
   2068 
   2069 	case FATTR4_LEASE_TIME_MASK:
   2070 		sargp->vap->va_mask = 0;
   2071 		return (NFS4_OK);
   2072 
   2073 	default:
   2074 		va_mask = 0;
   2075 		for (i = 0; i < nfs4_ntov_map_size; i++) {
   2076 			if ((breq & nfs4_ntov_map[i].fbit) &&
   2077 			    nfs4_ntov_map[i].vbit)
   2078 				va_mask |= nfs4_ntov_map[i].vbit;
   2079 		}
   2080 
   2081 		/*
   2082 		 * Check is vfsstat is needed
   2083 		 */
   2084 		if (breq & NFS4_FS_ATTR_MASK)
   2085 			sargp->sbp = sbp;
   2086 
   2087 		sargp->vap->va_mask = va_mask;
   2088 		return (NFS4_OK);
   2089 	}
   2090 	/* NOTREACHED */
   2091 }
   2092 
   2093 /*
   2094  * bitmap4_get_sysattrs is called by getattr and readdir.
   2095  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
   2096  * Returns nfsv4 status.
   2097  */
   2098 static nfsstat4
   2099 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
   2100 {
   2101 	int error;
   2102 	struct compound_state *cs = sargp->cs;
   2103 	vnode_t *vp = cs->vp;
   2104 
   2105 	if (sargp->sbp != NULL) {
   2106 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
   2107 			sargp->sbp = NULL;	/* to identify error */
   2108 			return (puterrno4(error));
   2109 		}
   2110 	}
   2111 
   2112 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
   2113 }
   2114 
   2115 static void
   2116 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
   2117 {
   2118 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
   2119 	    KM_SLEEP);
   2120 	ntovp->attrcnt = 0;
   2121 	ntovp->vfsstat = FALSE;
   2122 }
   2123 
   2124 static void
   2125 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
   2126     struct nfs4_svgetit_arg *sargp)
   2127 {
   2128 	int i;
   2129 	union nfs4_attr_u *na;
   2130 	uint8_t *amap;
   2131 
   2132 	/*
   2133 	 * XXX Should do the same checks for whether the bit is set
   2134 	 */
   2135 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
   2136 	    i < ntovp->attrcnt; i++, na++, amap++) {
   2137 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
   2138 		    NFS4ATTR_FREEIT, sargp, na);
   2139 	}
   2140 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
   2141 		/*
   2142 		 * xdr_free for getattr will be done later
   2143 		 */
   2144 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
   2145 		    i < ntovp->attrcnt; i++, na++, amap++) {
   2146 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
   2147 		}
   2148 	}
   2149 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
   2150 }
   2151 
   2152 /*
   2153  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
   2154  */
   2155 static nfsstat4
   2156 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
   2157     struct nfs4_svgetit_arg *sargp)
   2158 {
   2159 	int error = 0;
   2160 	int i, k;
   2161 	struct nfs4_ntov_table ntov;
   2162 	XDR xdr;
   2163 	ulong_t xdr_size;
   2164 	char *xdr_attrs;
   2165 	nfsstat4 status = NFS4_OK;
   2166 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
   2167 	union nfs4_attr_u *na;
   2168 	uint8_t *amap;
   2169 
   2170 	sargp->op = NFS4ATTR_GETIT;
   2171 	sargp->flag = 0;
   2172 
   2173 	fattrp->attrmask = 0;
   2174 	/* if no bits requested, then return empty fattr4 */
   2175 	if (breq == 0) {
   2176 		fattrp->attrlist4_len = 0;
   2177 		fattrp->attrlist4 = NULL;
   2178 		return (NFS4_OK);
   2179 	}
   2180 
   2181 	/*
   2182 	 * return NFS4ERR_INVAL when client requests write-only attrs
   2183 	 */
   2184 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
   2185 		return (NFS4ERR_INVAL);
   2186 
   2187 	nfs4_ntov_table_init(&ntov);
   2188 	na = ntov.na;
   2189 	amap = ntov.amap;
   2190 
   2191 	/*
   2192 	 * Now loop to get or verify the attrs
   2193 	 */
   2194 	for (i = 0; i < nfs4_ntov_map_size; i++) {
   2195 		if (breq & nfs4_ntov_map[i].fbit) {
   2196 			if ((*nfs4_ntov_map[i].sv_getit)(
   2197 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
   2198 
   2199 				error = (*nfs4_ntov_map[i].sv_getit)(
   2200 				    NFS4ATTR_GETIT, sargp, na);
   2201 
   2202 				/*
   2203 				 * Possible error values:
   2204 				 * >0 if sv_getit failed to
   2205 				 * get the attr; 0 if succeeded;
   2206 				 * <0 if rdattr_error and the
   2207 				 * attribute cannot be returned.
   2208 				 */
   2209 				if (error && !(sargp->rdattr_error_req))
   2210 					goto done;
   2211 				/*
   2212 				 * If error then just for entry
   2213 				 */
   2214 				if (error == 0) {
   2215 					fattrp->attrmask |=
   2216 					    nfs4_ntov_map[i].fbit;
   2217 					*amap++ =
   2218 					    (uint8_t)nfs4_ntov_map[i].nval;
   2219 					na++;
   2220 					(ntov.attrcnt)++;
   2221 				} else if ((error > 0) &&
   2222 				    (sargp->rdattr_error == NFS4_OK)) {
   2223 					sargp->rdattr_error = puterrno4(error);
   2224 				}
   2225 				error = 0;
   2226 			}
   2227 		}
   2228 	}
   2229 
   2230 	/*
   2231 	 * If rdattr_error was set after the return value for it was assigned,
   2232 	 * update it.
   2233 	 */
   2234 	if (prev_rdattr_error != sargp->rdattr_error) {
   2235 		na = ntov.na;
   2236 		amap = ntov.amap;
   2237 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2238 			k = *amap;
   2239 			if (k < FATTR4_RDATTR_ERROR) {
   2240 				continue;
   2241 			}
   2242 			if ((k == FATTR4_RDATTR_ERROR) &&
   2243 			    ((*nfs4_ntov_map[k].sv_getit)(
   2244 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
   2245 
   2246 				(void) (*nfs4_ntov_map[k].sv_getit)(
   2247 				    NFS4ATTR_GETIT, sargp, na);
   2248 			}
   2249 			break;
   2250 		}
   2251 	}
   2252 
   2253 	xdr_size = 0;
   2254 	na = ntov.na;
   2255 	amap = ntov.amap;
   2256 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2257 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
   2258 	}
   2259 
   2260 	fattrp->attrlist4_len = xdr_size;
   2261 	if (xdr_size) {
   2262 		/* freed by rfs4_op_getattr_free() */
   2263 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
   2264 
   2265 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
   2266 
   2267 		na = ntov.na;
   2268 		amap = ntov.amap;
   2269 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2270 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
   2271 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
   2272 				    int, *amap);
   2273 				status = NFS4ERR_SERVERFAULT;
   2274 				break;
   2275 			}
   2276 		}
   2277 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
   2278 	} else {
   2279 		fattrp->attrlist4 = NULL;
   2280 	}
   2281 done:
   2282 
   2283 	nfs4_ntov_table_free(&ntov, sargp);
   2284 
   2285 	if (error != 0)
   2286 		status = puterrno4(error);
   2287 
   2288 	return (status);
   2289 }
   2290 
   2291 /* ARGSUSED */
   2292 static void
   2293 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2294     struct compound_state *cs)
   2295 {
   2296 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
   2297 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   2298 	struct nfs4_svgetit_arg sarg;
   2299 	struct statvfs64 sb;
   2300 	nfsstat4 status;
   2301 
   2302 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
   2303 	    GETATTR4args *, args);
   2304 
   2305 	if (cs->vp == NULL) {
   2306 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2307 		goto out;
   2308 	}
   2309 
   2310 	if (cs->access == CS_ACCESS_DENIED) {
   2311 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2312 		goto out;
   2313 	}
   2314 
   2315 	sarg.sbp = &sb;
   2316 	sarg.cs = cs;
   2317 
   2318 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
   2319 	if (status == NFS4_OK) {
   2320 		status = bitmap4_get_sysattrs(&sarg);
   2321 		if (status == NFS4_OK)
   2322 			status = do_rfs4_op_getattr(args->attr_request,
   2323 			    &resp->obj_attributes, &sarg);
   2324 	}
   2325 	*cs->statusp = resp->status = status;
   2326 out:
   2327 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
   2328 	    GETATTR4res *, resp);
   2329 }
   2330 
   2331 static void
   2332 rfs4_op_getattr_free(nfs_resop4 *resop)
   2333 {
   2334 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   2335 
   2336 	nfs4_fattr4_free(&resp->obj_attributes);
   2337 }
   2338 
   2339 /* ARGSUSED */
   2340 static void
   2341 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2342     struct compound_state *cs)
   2343 {
   2344 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   2345 
   2346 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
   2347 
   2348 	if (cs->vp == NULL) {
   2349 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2350 		goto out;
   2351 	}
   2352 	if (cs->access == CS_ACCESS_DENIED) {
   2353 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2354 		goto out;
   2355 	}
   2356 
   2357 	resp->object.nfs_fh4_val =
   2358 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
   2359 	nfs_fh4_copy(&cs->fh, &resp->object);
   2360 	*cs->statusp = resp->status = NFS4_OK;
   2361 out:
   2362 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
   2363 	    GETFH4res *, resp);
   2364 }
   2365 
   2366 static void
   2367 rfs4_op_getfh_free(nfs_resop4 *resop)
   2368 {
   2369 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   2370 
   2371 	if (resp->status == NFS4_OK &&
   2372 	    resp->object.nfs_fh4_val != NULL) {
   2373 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
   2374 		resp->object.nfs_fh4_val = NULL;
   2375 		resp->object.nfs_fh4_len = 0;
   2376 	}
   2377 }
   2378 
   2379 /*
   2380  * illegal: args: void
   2381  *	    res : status (NFS4ERR_OP_ILLEGAL)
   2382  */
   2383 /* ARGSUSED */
   2384 static void
   2385 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
   2386     struct svc_req *req, struct compound_state *cs)
   2387 {
   2388 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
   2389 
   2390 	resop->resop = OP_ILLEGAL;
   2391 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
   2392 }
   2393 
   2394 /*
   2395  * link: args: SAVED_FH: file, CURRENT_FH: target directory
   2396  *	 res: status. If success - CURRENT_FH unchanged, return change_info
   2397  */
   2398 /* ARGSUSED */
   2399 static void
   2400 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2401     struct compound_state *cs)
   2402 {
   2403 	LINK4args *args = &argop->nfs_argop4_u.oplink;
   2404 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
   2405 	int error;
   2406 	vnode_t *vp;
   2407 	vnode_t *dvp;
   2408 	struct vattr bdva, idva, adva;
   2409 	char *nm;
   2410 	uint_t  len;
   2411 	struct sockaddr *ca;
   2412 	char *name = NULL;
   2413 
   2414 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
   2415 	    LINK4args *, args);
   2416 
   2417 	/* SAVED_FH: source object */
   2418 	vp = cs->saved_vp;
   2419 	if (vp == NULL) {
   2420 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2421 		goto out;
   2422 	}
   2423 
   2424 	/* CURRENT_FH: target directory */
   2425 	dvp = cs->vp;
   2426 	if (dvp == NULL) {
   2427 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2428 		goto out;
   2429 	}
   2430 
   2431 	/*
   2432 	 * If there is a non-shared filesystem mounted on this vnode,
   2433 	 * do not allow to link any file in this directory.
   2434 	 */
   2435 	if (vn_ismntpt(dvp)) {
   2436 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2437 		goto out;
   2438 	}
   2439 
   2440 	if (cs->access == CS_ACCESS_DENIED) {
   2441 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2442 		goto out;
   2443 	}
   2444 
   2445 	/* Check source object's type validity */
   2446 	if (vp->v_type == VDIR) {
   2447 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   2448 		goto out;
   2449 	}
   2450 
   2451 	/* Check target directory's type */
   2452 	if (dvp->v_type != VDIR) {
   2453 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2454 		goto out;
   2455 	}
   2456 
   2457 	if (cs->saved_exi != cs->exi) {
   2458 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   2459 		goto out;
   2460 	}
   2461 
   2462 	if (!utf8_dir_verify(&args->newname)) {
   2463 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2464 		goto out;
   2465 	}
   2466 
   2467 	nm = utf8_to_fn(&args->newname, &len, NULL);
   2468 	if (nm == NULL) {
   2469 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2470 		goto out;
   2471 	}
   2472 
   2473 	if (len > MAXNAMELEN) {
   2474 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2475 		kmem_free(nm, len);
   2476 		goto out;
   2477 	}
   2478 
   2479 	if (rdonly4(cs->exi, cs->vp, req)) {
   2480 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   2481 		kmem_free(nm, len);
   2482 		goto out;
   2483 	}
   2484 
   2485 	/* Get "before" change value */
   2486 	bdva.va_mask = AT_CTIME|AT_SEQ;
   2487 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
   2488 	if (error) {
   2489 		*cs->statusp = resp->status = puterrno4(error);
   2490 		kmem_free(nm, len);
   2491 		goto out;
   2492 	}
   2493 
   2494 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   2495 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   2496 	    MAXPATHLEN  + 1);
   2497 
   2498 	if (name == NULL) {
   2499 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2500 		kmem_free(nm, len);
   2501 		goto out;
   2502 	}
   2503 
   2504 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   2505 
   2506 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
   2507 
   2508 	if (nm != name)
   2509 		kmem_free(name, MAXPATHLEN + 1);
   2510 	kmem_free(nm, len);
   2511 
   2512 	/*
   2513 	 * Get the initial "after" sequence number, if it fails, set to zero
   2514 	 */
   2515 	idva.va_mask = AT_SEQ;
   2516 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
   2517 		idva.va_seq = 0;
   2518 
   2519 	/*
   2520 	 * Force modified data and metadata out to stable storage.
   2521 	 */
   2522 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
   2523 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   2524 
   2525 	if (error) {
   2526 		*cs->statusp = resp->status = puterrno4(error);
   2527 		goto out;
   2528 	}
   2529 
   2530 	/*
   2531 	 * Get "after" change value, if it fails, simply return the
   2532 	 * before value.
   2533 	 */
   2534 	adva.va_mask = AT_CTIME|AT_SEQ;
   2535 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
   2536 		adva.va_ctime = bdva.va_ctime;
   2537 		adva.va_seq = 0;
   2538 	}
   2539 
   2540 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   2541 
   2542 	/*
   2543 	 * The cinfo.atomic = TRUE only if we have
   2544 	 * non-zero va_seq's, and it has incremented by exactly one
   2545 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
   2546 	 */
   2547 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   2548 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
   2549 		resp->cinfo.atomic = TRUE;
   2550 	else
   2551 		resp->cinfo.atomic = FALSE;
   2552 
   2553 	*cs->statusp = resp->status = NFS4_OK;
   2554 out:
   2555 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
   2556 	    LINK4res *, resp);
   2557 }
   2558 
   2559 /*
   2560  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
   2561  */
   2562 
   2563 /* ARGSUSED */
   2564 static nfsstat4
   2565 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
   2566     struct compound_state *cs)
   2567 {
   2568 	int error;
   2569 	int different_export = 0;
   2570 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
   2571 	struct exportinfo *exi = NULL, *pre_exi = NULL;
   2572 	nfsstat4 stat;
   2573 	fid_t fid;
   2574 	int attrdir, dotdot, walk;
   2575 	bool_t is_newvp = FALSE;
   2576 
   2577 	if (cs->vp->v_flag & V_XATTRDIR) {
   2578 		attrdir = 1;
   2579 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
   2580 	} else {
   2581 		attrdir = 0;
   2582 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
   2583 	}
   2584 
   2585 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
   2586 
   2587 	/*
   2588 	 * If dotdotting, then need to check whether it's
   2589 	 * above the root of a filesystem, or above an
   2590 	 * export point.
   2591 	 */
   2592 	if (dotdot) {
   2593 
   2594 		/*
   2595 		 * If dotdotting at the root of a filesystem, then
   2596 		 * need to traverse back to the mounted-on filesystem
   2597 		 * and do the dotdot lookup there.
   2598 		 */
   2599 		if (cs->vp->v_flag & VROOT) {
   2600 
   2601 			/*
   2602 			 * If at the system root, then can
   2603 			 * go up no further.
   2604 			 */
   2605 			if (VN_CMP(cs->vp, rootdir))
   2606 				return (puterrno4(ENOENT));
   2607 
   2608 			/*
   2609 			 * Traverse back to the mounted-on filesystem
   2610 			 */
   2611 			cs->vp = untraverse(cs->vp);
   2612 
   2613 			/*
   2614 			 * Set the different_export flag so we remember
   2615 			 * to pick up a new exportinfo entry for
   2616 			 * this new filesystem.
   2617 			 */
   2618 			different_export = 1;
   2619 		} else {
   2620 
   2621 			/*
   2622 			 * If dotdotting above an export point then set
   2623 			 * the different_export to get new export info.
   2624 			 */
   2625 			different_export = nfs_exported(cs->exi, cs->vp);
   2626 		}
   2627 	}
   2628 
   2629 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
   2630 	    NULL, NULL, NULL);
   2631 	if (error)
   2632 		return (puterrno4(error));
   2633 
   2634 	/*
   2635 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
   2636 	 *
   2637 	 * XXX if the vnode is a symlink and it is not visible in
   2638 	 * a pseudo filesystem, return ENOENT (not following symlink).
   2639 	 * V4 client can not mount such symlink. This is a regression
   2640 	 * from V2/V3.
   2641 	 *
   2642 	 * In the same exported filesystem, if the security flavor used
   2643 	 * is not an explicitly shared flavor, limit the view to the visible
   2644 	 * list entries only. This is not a WRONGSEC case because it's already
   2645 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
   2646 	 */
   2647 	if (!different_export &&
   2648 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
   2649 	    cs->access & CS_ACCESS_LIMITED)) {
   2650 		if (! nfs_visible(cs->exi, vp, &different_export)) {
   2651 			VN_RELE(vp);
   2652 			return (puterrno4(ENOENT));
   2653 		}
   2654 	}
   2655 
   2656 	/*
   2657 	 * If it's a mountpoint, then traverse it.
   2658 	 */
   2659 	if (vn_ismntpt(vp)) {
   2660 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
   2661 		pre_tvp = vp;		/* save pre-traversed vnode	*/
   2662 
   2663 		/*
   2664 		 * hold pre_tvp to counteract rele by traverse.  We will
   2665 		 * need pre_tvp below if checkexport4 fails
   2666 		 */
   2667 		VN_HOLD(pre_tvp);
   2668 		tvp = vp;
   2669 		if ((error = traverse(&tvp)) != 0) {
   2670 			VN_RELE(vp);
   2671 			VN_RELE(pre_tvp);
   2672 			return (puterrno4(error));
   2673 		}
   2674 		vp = tvp;
   2675 		different_export = 1;
   2676 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
   2677 		/*
   2678 		 * The vfsp comparison is to handle the case where
   2679 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
   2680 		 * and NFS is unaware of local fs transistions because
   2681 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
   2682 		 * the dir and the obj returned by lookup will have different
   2683 		 * vfs ptrs.
   2684 		 */
   2685 		different_export = 1;
   2686 	}
   2687 
   2688 	if (different_export) {
   2689 
   2690 		bzero(&fid, sizeof (fid));
   2691 		fid.fid_len = MAXFIDSZ;
   2692 		error = vop_fid_pseudo(vp, &fid);
   2693 		if (error) {
   2694 			VN_RELE(vp);
   2695 			if (pre_tvp)
   2696 				VN_RELE(pre_tvp);
   2697 			return (puterrno4(error));
   2698 		}
   2699 
   2700 		if (dotdot)
   2701 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
   2702 		else
   2703 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
   2704 
   2705 		if (exi == NULL) {
   2706 			if (pre_tvp) {
   2707 				/*
   2708 				 * If this vnode is a mounted-on vnode,
   2709 				 * but the mounted-on file system is not
   2710 				 * exported, send back the filehandle for
   2711 				 * the mounted-on vnode, not the root of
   2712 				 * the mounted-on file system.
   2713 				 */
   2714 				VN_RELE(vp);
   2715 				vp = pre_tvp;
   2716 				exi = pre_exi;
   2717 			} else {
   2718 				VN_RELE(vp);
   2719 				return (puterrno4(EACCES));
   2720 			}
   2721 		} else if (pre_tvp) {
   2722 			/* we're done with pre_tvp now. release extra hold */
   2723 			VN_RELE(pre_tvp);
   2724 		}
   2725 
   2726 		cs->exi = exi;
   2727 
   2728 		/*
   2729 		 * Now we do a checkauth4. The reason is that
   2730 		 * this client/user may not have access to the new
   2731 		 * exported file system, and if he does,
   2732 		 * the client/user may be mapped to a different uid.
   2733 		 *
   2734 		 * We start with a new cr, because the checkauth4 done
   2735 		 * in the PUT*FH operation over wrote the cred's uid,
   2736 		 * gid, etc, and we want the real thing before calling
   2737 		 * checkauth4()
   2738 		 */
   2739 		crfree(cs->cr);
   2740 		cs->cr = crdup(cs->basecr);
   2741 
   2742 		if (cs->vp)
   2743 			oldvp = cs->vp;
   2744 		cs->vp = vp;
   2745 		is_newvp = TRUE;
   2746 
   2747 		stat = call_checkauth4(cs, req);
   2748 		if (stat != NFS4_OK) {
   2749 			VN_RELE(cs->vp);
   2750 			cs->vp = oldvp;
   2751 			return (stat);
   2752 		}
   2753 	}
   2754 
   2755 	/*
   2756 	 * After various NFS checks, do a label check on the path
   2757 	 * component. The label on this path should either be the
   2758 	 * global zone's label or a zone's label. We are only
   2759 	 * interested in the zone's label because exported files
   2760 	 * in global zone is accessible (though read-only) to
   2761 	 * clients. The exportability/visibility check is already
   2762 	 * done before reaching this code.
   2763 	 */
   2764 	if (is_system_labeled()) {
   2765 		bslabel_t *clabel;
   2766 
   2767 		ASSERT(req->rq_label != NULL);
   2768 		clabel = req->rq_label;
   2769 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
   2770 		    "got client label from request(1)", struct svc_req *, req);
   2771 
   2772 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   2773 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
   2774 			    cs->exi)) {
   2775 				error = EACCES;
   2776 				goto err_out;
   2777 			}
   2778 		} else {
   2779 			/*
   2780 			 * We grant access to admin_low label clients
   2781 			 * only if the client is trusted, i.e. also
   2782 			 * running Solaris Trusted Extension.
   2783 			 */
   2784 			struct sockaddr	*ca;
   2785 			int		addr_type;
   2786 			void		*ipaddr;
   2787 			tsol_tpc_t	*tp;
   2788 
   2789 			ca = (struct sockaddr *)svc_getrpccaller(
   2790 			    req->rq_xprt)->buf;
   2791 			if (ca->sa_family == AF_INET) {
   2792 				addr_type = IPV4_VERSION;
   2793 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
   2794 			} else if (ca->sa_family == AF_INET6) {
   2795 				addr_type = IPV6_VERSION;
   2796 				ipaddr = &((struct sockaddr_in6 *)
   2797 				    ca)->sin6_addr;
   2798 			}
   2799 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
   2800 			if (tp == NULL || tp->tpc_tp.tp_doi !=
   2801 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
   2802 			    SUN_CIPSO) {
   2803 				if (tp != NULL)
   2804 					TPC_RELE(tp);
   2805 				error = EACCES;
   2806 				goto err_out;
   2807 			}
   2808 			TPC_RELE(tp);
   2809 		}
   2810 	}
   2811 
   2812 	error = makefh4(&cs->fh, vp, cs->exi);
   2813 
   2814 err_out:
   2815 	if (error) {
   2816 		if (is_newvp) {
   2817 			VN_RELE(cs->vp);
   2818 			cs->vp = oldvp;
   2819 		} else
   2820 			VN_RELE(vp);
   2821 		return (puterrno4(error));
   2822 	}
   2823 
   2824 	if (!is_newvp) {
   2825 		if (cs->vp)
   2826 			VN_RELE(cs->vp);
   2827 		cs->vp = vp;
   2828 	} else if (oldvp)
   2829 		VN_RELE(oldvp);
   2830 
   2831 	/*
   2832 	 * if did lookup on attrdir and didn't lookup .., set named
   2833 	 * attr fh flag
   2834 	 */
   2835 	if (attrdir && ! dotdot)
   2836 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
   2837 
   2838 	/* Assume false for now, open proc will set this */
   2839 	cs->mandlock = FALSE;
   2840 
   2841 	return (NFS4_OK);
   2842 }
   2843 
   2844 /* ARGSUSED */
   2845 static void
   2846 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2847     struct compound_state *cs)
   2848 {
   2849 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
   2850 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
   2851 	char *nm;
   2852 	uint_t len;
   2853 	struct sockaddr *ca;
   2854 	char *name = NULL;
   2855 
   2856 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
   2857 	    LOOKUP4args *, args);
   2858 
   2859 	if (cs->vp == NULL) {
   2860 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2861 		goto out;
   2862 	}
   2863 
   2864 	if (cs->vp->v_type == VLNK) {
   2865 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
   2866 		goto out;
   2867 	}
   2868 
   2869 	if (cs->vp->v_type != VDIR) {
   2870 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2871 		goto out;
   2872 	}
   2873 
   2874 	if (!utf8_dir_verify(&args->objname)) {
   2875 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2876 		goto out;
   2877 	}
   2878 
   2879 	nm = utf8_to_str(&args->objname, &len, NULL);
   2880 	if (nm == NULL) {
   2881 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2882 		goto out;
   2883 	}
   2884 
   2885 	if (len > MAXNAMELEN) {
   2886 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2887 		kmem_free(nm, len);
   2888 		goto out;
   2889 	}
   2890 
   2891 	/* If necessary, convert to UTF-8 for illbehaved clients */
   2892 
   2893 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   2894 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   2895 	    MAXPATHLEN  + 1);
   2896 
   2897 	if (name == NULL) {
   2898 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2899 		kmem_free(nm, len);
   2900 		goto out;
   2901 	}
   2902 
   2903 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, len, req, cs);
   2904 
   2905 	if (name != nm)
   2906 		kmem_free(name, MAXPATHLEN + 1);
   2907 	kmem_free(nm, len);
   2908 
   2909 out:
   2910 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
   2911 	    LOOKUP4res *, resp);
   2912 }
   2913 
   2914 /* ARGSUSED */
   2915 static void
   2916 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2917     struct compound_state *cs)
   2918 {
   2919 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
   2920 
   2921 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
   2922 
   2923 	if (cs->vp == NULL) {
   2924 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2925 		goto out;
   2926 	}
   2927 
   2928 	if (cs->vp->v_type != VDIR) {
   2929 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2930 		goto out;
   2931 	}
   2932 
   2933 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
   2934 
   2935 	/*
   2936 	 * From NFSV4 Specification, LOOKUPP should not check for
   2937 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
   2938 	 */
   2939 	if (resp->status == NFS4ERR_WRONGSEC) {
   2940 		*cs->statusp = resp->status = NFS4_OK;
   2941 	}
   2942 
   2943 out:
   2944 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
   2945 	    LOOKUPP4res *, resp);
   2946 }
   2947 
   2948 
   2949 /*ARGSUSED2*/
   2950 static void
   2951 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2952     struct compound_state *cs)
   2953 {
   2954 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
   2955 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
   2956 	vnode_t		*avp = NULL;
   2957 	int		lookup_flags = LOOKUP_XATTR, error;
   2958 	int		exp_ro = 0;
   2959 
   2960 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
   2961 	    OPENATTR4args *, args);
   2962 
   2963 	if (cs->vp == NULL) {
   2964 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2965 		goto out;
   2966 	}
   2967 
   2968 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
   2969 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
   2970 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
   2971 		goto out;
   2972 	}
   2973 
   2974 	/*
   2975 	 * If file system supports passing ACE mask to VOP_ACCESS then
   2976 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
   2977 	 */
   2978 
   2979 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
   2980 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
   2981 		    V_ACE_MASK, cs->cr, NULL);
   2982 	else
   2983 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
   2984 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
   2985 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
   2986 
   2987 	if (error) {
   2988 		*cs->statusp = resp->status = puterrno4(EACCES);
   2989 		goto out;
   2990 	}
   2991 
   2992 	/*
   2993 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
   2994 	 * the file system is exported read-only -- regardless of
   2995 	 * createdir flag.  Otherwise the attrdir would be created
   2996 	 * (assuming server fs isn't mounted readonly locally).  If
   2997 	 * VOP_LOOKUP returns ENOENT in this case, the error will
   2998 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
   2999 	 * because specfs has no VOP_LOOKUP op, so the macro would
   3000 	 * return ENOSYS.  EINVAL is returned by all (current)
   3001 	 * Solaris file system implementations when any of their
   3002 	 * restrictions are violated (xattr(dir) can't have xattrdir).
   3003 	 * Returning NOTSUPP is more appropriate in this case
   3004 	 * because the object will never be able to have an attrdir.
   3005 	 */
   3006 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
   3007 		lookup_flags |= CREATE_XATTR_DIR;
   3008 
   3009 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
   3010 	    NULL, NULL, NULL);
   3011 
   3012 	if (error) {
   3013 		if (error == ENOENT && args->createdir && exp_ro)
   3014 			*cs->statusp = resp->status = puterrno4(EROFS);
   3015 		else if (error == EINVAL || error == ENOSYS)
   3016 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
   3017 		else
   3018 			*cs->statusp = resp->status = puterrno4(error);
   3019 		goto out;
   3020 	}
   3021 
   3022 	ASSERT(avp->v_flag & V_XATTRDIR);
   3023 
   3024 	error = makefh4(&cs->fh, avp, cs->exi);
   3025 
   3026 	if (error) {
   3027 		VN_RELE(avp);
   3028 		*cs->statusp = resp->status = puterrno4(error);
   3029 		goto out;
   3030 	}
   3031 
   3032 	VN_RELE(cs->vp);
   3033 	cs->vp = avp;
   3034 
   3035 	/*
   3036 	 * There is no requirement for an attrdir fh flag
   3037 	 * because the attrdir has a vnode flag to distinguish
   3038 	 * it from regular (non-xattr) directories.  The
   3039 	 * FH4_ATTRDIR flag is set for future sanity checks.
   3040 	 */
   3041 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
   3042 	*cs->statusp = resp->status = NFS4_OK;
   3043 
   3044 out:
   3045 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
   3046 	    OPENATTR4res *, resp);
   3047 }
   3048 
   3049 static int
   3050 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
   3051     caller_context_t *ct)
   3052 {
   3053 	int error;
   3054 	int i;
   3055 	clock_t delaytime;
   3056 
   3057 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
   3058 
   3059 	/*
   3060 	 * Don't block on mandatory locks. If this routine returns
   3061 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
   3062 	 */
   3063 	uio->uio_fmode = FNONBLOCK;
   3064 
   3065 	for (i = 0; i < rfs4_maxlock_tries; i++) {
   3066 
   3067 
   3068 		if (direction == FREAD) {
   3069 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
   3070 			error = VOP_READ(vp, uio, ioflag, cred, ct);
   3071 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
   3072 		} else {
   3073 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
   3074 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
   3075 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
   3076 		}
   3077 
   3078 		if (error != EAGAIN)
   3079 			break;
   3080 
   3081 		if (i < rfs4_maxlock_tries - 1) {
   3082 			delay(delaytime);
   3083 			delaytime *= 2;
   3084 		}
   3085 	}
   3086 
   3087 	return (error);
   3088 }
   3089 
   3090 /* ARGSUSED */
   3091 static void
   3092 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3093     struct compound_state *cs)
   3094 {
   3095 	READ4args *args = &argop->nfs_argop4_u.opread;
   3096 	READ4res *resp = &resop->nfs_resop4_u.opread;
   3097 	int error;
   3098 	int verror;
   3099 	vnode_t *vp;
   3100 	struct vattr va;
   3101 	struct iovec iov;
   3102 	struct uio uio;
   3103 	u_offset_t offset;
   3104 	bool_t *deleg = &cs->deleg;
   3105 	nfsstat4 stat;
   3106 	int in_crit = 0;
   3107 	mblk_t *mp;
   3108 	int alloc_err = 0;
   3109 	caller_context_t ct;
   3110 
   3111 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
   3112 	    READ4args, args);
   3113 
   3114 	vp = cs->vp;
   3115 	if (vp == NULL) {
   3116 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3117 		goto out;
   3118 	}
   3119 	if (cs->access == CS_ACCESS_DENIED) {
   3120 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3121 		goto out;
   3122 	}
   3123 
   3124 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
   3125 	    deleg, TRUE, &ct)) != NFS4_OK) {
   3126 		*cs->statusp = resp->status = stat;
   3127 		goto out;
   3128 	}
   3129 
   3130 	/*
   3131 	 * Enter the critical region before calling VOP_RWLOCK
   3132 	 * to avoid a deadlock with write requests.
   3133 	 */
   3134 	if (nbl_need_check(vp)) {
   3135 		nbl_start_crit(vp, RW_READER);
   3136 		in_crit = 1;
   3137 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
   3138 		    &ct)) {
   3139 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
   3140 			goto out;
   3141 		}
   3142 	}
   3143 
   3144 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
   3145 	    deleg, TRUE, &ct)) != NFS4_OK) {
   3146 		*cs->statusp = resp->status = stat;
   3147 		goto out;
   3148 	}
   3149 
   3150 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
   3151 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3152 
   3153 	/*
   3154 	 * If we can't get the attributes, then we can't do the
   3155 	 * right access checking.  So, we'll fail the request.
   3156 	 */
   3157 	if (verror) {
   3158 		*cs->statusp = resp->status = puterrno4(verror);
   3159 		goto out;
   3160 	}
   3161 
   3162 	if (vp->v_type != VREG) {
   3163 		*cs->statusp = resp->status =
   3164 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
   3165 		goto out;
   3166 	}
   3167 
   3168 	if (crgetuid(cs->cr) != va.va_uid &&
   3169 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
   3170 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
   3171 		*cs->statusp = resp->status = puterrno4(error);
   3172 		goto out;
   3173 	}
   3174 
   3175 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
   3176 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3177 		goto out;
   3178 	}
   3179 
   3180 	offset = args->offset;
   3181 	if (offset >= va.va_size) {
   3182 		*cs->statusp = resp->status = NFS4_OK;
   3183 		resp->eof = TRUE;
   3184 		resp->data_len = 0;
   3185 		resp->data_val = NULL;
   3186 		resp->mblk = NULL;
   3187 		/* RDMA */
   3188 		resp->wlist = args->wlist;
   3189 		resp->wlist_len = resp->data_len;
   3190 		*cs->statusp = resp->status = NFS4_OK;
   3191 		if (resp->wlist)
   3192 			clist_zero_len(resp->wlist);
   3193 		goto out;
   3194 	}
   3195 
   3196 	if (args->count == 0) {
   3197 		*cs->statusp = resp->status = NFS4_OK;
   3198 		resp->eof = FALSE;
   3199 		resp->data_len = 0;
   3200 		resp->data_val = NULL;
   3201 		resp->mblk = NULL;
   3202 		/* RDMA */
   3203 		resp->wlist = args->wlist;
   3204 		resp->wlist_len = resp->data_len;
   3205 		if (resp->wlist)
   3206 			clist_zero_len(resp->wlist);
   3207 		goto out;
   3208 	}
   3209 
   3210 	/*
   3211 	 * Do not allocate memory more than maximum allowed
   3212 	 * transfer size
   3213 	 */
   3214 	if (args->count > rfs4_tsize(req))
   3215 		args->count = rfs4_tsize(req);
   3216 
   3217 	/*
   3218 	 * If returning data via RDMA Write, then grab the chunk list. If we
   3219 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
   3220 	 */
   3221 	if (args->wlist) {
   3222 		mp = NULL;
   3223 		(void) rdma_get_wchunk(req, &iov, args->wlist);
   3224 	} else {
   3225 		/*
   3226 		 * mp will contain the data to be sent out in the read reply.
   3227 		 * It will be freed after the reply has been sent. Let's
   3228 		 * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
   3229 		 * the call to xdrmblk_putmblk() never fails. If the first
   3230 		 * alloc of the requested size fails, then decrease the size to
   3231 		 * something more reasonable and wait for the allocation to
   3232 		 * occur.
   3233 		 */
   3234 		mp = allocb(RNDUP(args->count), BPRI_MED);
   3235 		if (mp == NULL) {
   3236 			if (args->count > MAXBSIZE)
   3237 				args->count = MAXBSIZE;
   3238 			mp = allocb_wait(RNDUP(args->count), BPRI_MED,
   3239 			    STR_NOSIG, &alloc_err);
   3240 		}
   3241 		ASSERT(mp != NULL);
   3242 		ASSERT(alloc_err == 0);
   3243 
   3244 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
   3245 		iov.iov_len = args->count;
   3246 	}
   3247 
   3248 	uio.uio_iov = &iov;
   3249 	uio.uio_iovcnt = 1;
   3250 	uio.uio_segflg = UIO_SYSSPACE;
   3251 	uio.uio_extflg = UIO_COPY_CACHED;
   3252 	uio.uio_loffset = args->offset;
   3253 	uio.uio_resid = args->count;
   3254 
   3255 	error = do_io(FREAD, vp, &uio, 0, cs->cr, &ct);
   3256 
   3257 	va.va_mask = AT_SIZE;
   3258 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3259 
   3260 	if (error) {
   3261 		freeb(mp);
   3262 		*cs->statusp = resp->status = puterrno4(error);
   3263 		goto out;
   3264 	}
   3265 
   3266 	*cs->statusp = resp->status = NFS4_OK;
   3267 
   3268 	ASSERT(uio.uio_resid >= 0);
   3269 	resp->data_len = args->count - uio.uio_resid;
   3270 	if (mp) {
   3271 		resp->data_val = (char *)mp->b_datap->db_base;
   3272 	} else {
   3273 		resp->data_val = (caddr_t)iov.iov_base;
   3274 	}
   3275 	resp->mblk = mp;
   3276 
   3277 	if (!verror && offset + resp->data_len == va.va_size)
   3278 		resp->eof = TRUE;
   3279 	else
   3280 		resp->eof = FALSE;
   3281 
   3282 	if (args->wlist) {
   3283 		if (!rdma_setup_read_data4(args, resp)) {
   3284 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   3285 		}
   3286 	} else {
   3287 		resp->wlist = NULL;
   3288 	}
   3289 
   3290 out:
   3291 	if (in_crit)
   3292 		nbl_end_crit(vp);
   3293 
   3294 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
   3295 	    READ4res *, resp);
   3296 }
   3297 
   3298 static void
   3299 rfs4_op_read_free(nfs_resop4 *resop)
   3300 {
   3301 	READ4res	*resp = &resop->nfs_resop4_u.opread;
   3302 
   3303 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
   3304 		freeb(resp->mblk);
   3305 		resp->mblk = NULL;
   3306 		resp->data_val = NULL;
   3307 		resp->data_len = 0;
   3308 	}
   3309 }
   3310 
   3311 static void
   3312 rfs4_op_readdir_free(nfs_resop4 * resop)
   3313 {
   3314 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
   3315 
   3316 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
   3317 		freeb(resp->mblk);
   3318 		resp->mblk = NULL;
   3319 		resp->data_len = 0;
   3320 	}
   3321 }
   3322 
   3323 
   3324 /* ARGSUSED */
   3325 static void
   3326 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   3327     struct compound_state *cs)
   3328 {
   3329 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
   3330 	int		error;
   3331 	vnode_t		*vp;
   3332 	struct exportinfo *exi, *sav_exi;
   3333 	nfs_fh4_fmt_t	*fh_fmtp;
   3334 
   3335 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
   3336 
   3337 	if (cs->vp) {
   3338 		VN_RELE(cs->vp);
   3339 		cs->vp = NULL;
   3340 	}
   3341 
   3342 	if (cs->cr)
   3343 		crfree(cs->cr);
   3344 
   3345 	cs->cr = crdup(cs->basecr);
   3346 
   3347 	vp = exi_public->exi_vp;
   3348 	if (vp == NULL) {
   3349 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   3350 		goto out;
   3351 	}
   3352 
   3353 	error = makefh4(&cs->fh, vp, exi_public);
   3354 	if (error != 0) {
   3355 		*cs->statusp = resp->status = puterrno4(error);
   3356 		goto out;
   3357 	}
   3358 	sav_exi = cs->exi;
   3359 	if (exi_public == exi_root) {
   3360 		/*
   3361 		 * No filesystem is actually shared public, so we default
   3362 		 * to exi_root. In this case, we must check whether root
   3363 		 * is exported.
   3364 		 */
   3365 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
   3366 
   3367 		/*
   3368 		 * if root filesystem is exported, the exportinfo struct that we
   3369 		 * should use is what checkexport4 returns, because root_exi is
   3370 		 * actually a mostly empty struct.
   3371 		 */
   3372 		exi = checkexport4(&fh_fmtp->fh4_fsid,
   3373 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
   3374 		cs->exi = ((exi != NULL) ? exi : exi_public);
   3375 	} else {
   3376 		/*
   3377 		 * it's a properly shared filesystem
   3378 		 */
   3379 		cs->exi = exi_public;
   3380 	}
   3381 
   3382 	if (is_system_labeled()) {
   3383 		bslabel_t *clabel;
   3384 
   3385 		ASSERT(req->rq_label != NULL);
   3386 		clabel = req->rq_label;
   3387 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
   3388 		    "got client label from request(1)",
   3389 		    struct svc_req *, req);
   3390 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3391 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
   3392 			    cs->exi)) {
   3393 				*cs->statusp = resp->status =
   3394 				    NFS4ERR_SERVERFAULT;
   3395 				goto out;
   3396 			}
   3397 		}
   3398 	}
   3399 
   3400 	VN_HOLD(vp);
   3401 	cs->vp = vp;
   3402 
   3403 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3404 		VN_RELE(cs->vp);
   3405 		cs->vp = NULL;
   3406 		cs->exi = sav_exi;
   3407 		goto out;
   3408 	}
   3409 
   3410 	*cs->statusp = resp->status = NFS4_OK;
   3411 out:
   3412 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
   3413 	    PUTPUBFH4res *, resp);
   3414 }
   3415 
   3416 /*
   3417  * XXX - issue with put*fh operations. Suppose /export/home is exported.
   3418  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
   3419  * or joe have restrictive search permissions, then we shouldn't let
   3420  * the client get a file handle. This is easy to enforce. However, we
   3421  * don't know what security flavor should be used until we resolve the
   3422  * path name. Another complication is uid mapping. If root is
   3423  * the user, then it will be mapped to the anonymous user by default,
   3424  * but we won't know that till we've resolved the path name. And we won't
   3425  * know what the anonymous user is.
   3426  * Luckily, SECINFO is specified to take a full filename.
   3427  * So what we will have to in rfs4_op_lookup is check that flavor of
   3428  * the target object matches that of the request, and if root was the
   3429  * caller, check for the root= and anon= options, and if necessary,
   3430  * repeat the lookup using the right cred_t. But that's not done yet.
   3431  */
   3432 /* ARGSUSED */
   3433 static void
   3434 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3435     struct compound_state *cs)
   3436 {
   3437 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
   3438 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
   3439 	nfs_fh4_fmt_t *fh_fmtp;
   3440 
   3441 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
   3442 	    PUTFH4args *, args);
   3443 
   3444 	if (cs->vp) {
   3445 		VN_RELE(cs->vp);
   3446 		cs->vp = NULL;
   3447 	}
   3448 
   3449 	if (cs->cr) {
   3450 		crfree(cs->cr);
   3451 		cs->cr = NULL;
   3452 	}
   3453 
   3454 
   3455 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
   3456 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
   3457 		goto out;
   3458 	}
   3459 
   3460 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
   3461 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
   3462 	    NULL);
   3463 
   3464 	if (cs->exi == NULL) {
   3465 		*cs->statusp = resp->status = NFS4ERR_STALE;
   3466 		goto out;
   3467 	}
   3468 
   3469 	cs->cr = crdup(cs->basecr);
   3470 
   3471 	ASSERT(cs->cr != NULL);
   3472 
   3473 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
   3474 		*cs->statusp = resp->status;
   3475 		goto out;
   3476 	}
   3477 
   3478 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3479 		VN_RELE(cs->vp);
   3480 		cs->vp = NULL;
   3481 		goto out;
   3482 	}
   3483 
   3484 	nfs_fh4_copy(&args->object, &cs->fh);
   3485 	*cs->statusp = resp->status = NFS4_OK;
   3486 	cs->deleg = FALSE;
   3487 
   3488 out:
   3489 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
   3490 	    PUTFH4res *, resp);
   3491 }
   3492 
   3493 /* ARGSUSED */
   3494 static void
   3495 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3496     struct compound_state *cs)
   3497 {
   3498 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
   3499 	int error;
   3500 	fid_t fid;
   3501 	struct exportinfo *exi, *sav_exi;
   3502 
   3503 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
   3504 
   3505 	if (cs->vp) {
   3506 		VN_RELE(cs->vp);
   3507 		cs->vp = NULL;
   3508 	}
   3509 
   3510 	if (cs->cr)
   3511 		crfree(cs->cr);
   3512 
   3513 	cs->cr = crdup(cs->basecr);
   3514 
   3515 	/*
   3516 	 * Using rootdir, the system root vnode,
   3517 	 * get its fid.
   3518 	 */
   3519 	bzero(&fid, sizeof (fid));
   3520 	fid.fid_len = MAXFIDSZ;
   3521 	error = vop_fid_pseudo(rootdir, &fid);
   3522 	if (error != 0) {
   3523 		*cs->statusp = resp->status = puterrno4(error);
   3524 		goto out;
   3525 	}
   3526 
   3527 	/*
   3528 	 * Then use the root fsid & fid it to find out if it's exported
   3529 	 *
   3530 	 * If the server root isn't exported directly, then
   3531 	 * it should at least be a pseudo export based on
   3532 	 * one or more exports further down in the server's
   3533 	 * file tree.
   3534 	 */
   3535 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
   3536 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
   3537 		NFS4_DEBUG(rfs4_debug,
   3538 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
   3539 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   3540 		goto out;
   3541 	}
   3542 
   3543 	/*
   3544 	 * Now make a filehandle based on the root
   3545 	 * export and root vnode.
   3546 	 */
   3547 	error = makefh4(&cs->fh, rootdir, exi);
   3548 	if (error != 0) {
   3549 		*cs->statusp = resp->status = puterrno4(error);
   3550 		goto out;
   3551 	}
   3552 
   3553 	sav_exi = cs->exi;
   3554 	cs->exi = exi;
   3555 
   3556 	VN_HOLD(rootdir);
   3557 	cs->vp = rootdir;
   3558 
   3559 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3560 		VN_RELE(rootdir);
   3561 		cs->vp = NULL;
   3562 		cs->exi = sav_exi;
   3563 		goto out;
   3564 	}
   3565 
   3566 	*cs->statusp = resp->status = NFS4_OK;
   3567 	cs->deleg = FALSE;
   3568 out:
   3569 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
   3570 	    PUTROOTFH4res *, resp);
   3571 }
   3572 
   3573 /*
   3574  * A directory entry is a valid nfsv4 entry if
   3575  * - it has a non-zero ino
   3576  * - it is not a dot or dotdot name
   3577  * - it is visible in a pseudo export or in a real export that can
   3578  *   only have a limited view.
   3579  */
   3580 static bool_t
   3581 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
   3582     int *expseudo, int check_visible)
   3583 {
   3584 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
   3585 		*expseudo = 0;
   3586 		return (FALSE);
   3587 	}
   3588 
   3589 	if (! check_visible) {
   3590 		*expseudo = 0;
   3591 		return (TRUE);
   3592 	}
   3593 
   3594 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
   3595 }
   3596 
   3597 /*
   3598  * set_rdattr_params sets up the variables used to manage what information
   3599  * to get for each directory entry.
   3600  */
   3601 static nfsstat4
   3602 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
   3603     bitmap4 attrs, bool_t *need_to_lookup)
   3604 {
   3605 	uint_t	va_mask;
   3606 	nfsstat4 status;
   3607 	bitmap4 objbits;
   3608 
   3609 	status = bitmap4_to_attrmask(attrs, sargp);
   3610 	if (status != NFS4_OK) {
   3611 		/*
   3612 		 * could not even figure attr mask
   3613 		 */
   3614 		return (status);
   3615 	}
   3616 	va_mask = sargp->vap->va_mask;
   3617 
   3618 	/*
   3619 	 * dirent's d_ino is always correct value for mounted_on_fileid.
   3620 	 * mntdfid_set is set once here, but mounted_on_fileid is
   3621 	 * set in main dirent processing loop for each dirent.
   3622 	 * The mntdfid_set is a simple optimization that lets the
   3623 	 * server attr code avoid work when caller is readdir.
   3624 	 */
   3625 	sargp->mntdfid_set = TRUE;
   3626 
   3627 	/*
   3628 	 * Lookup entry only if client asked for any of the following:
   3629 	 * a) vattr attrs
   3630 	 * b) vfs attrs
   3631 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
   3632 	 *    other than mounted_on_fileid (which we can take from dirent)
   3633 	 */
   3634 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
   3635 
   3636 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
   3637 		*need_to_lookup = TRUE;
   3638 	else
   3639 		*need_to_lookup = FALSE;
   3640 
   3641 	if (sargp->sbp == NULL)
   3642 		return (NFS4_OK);
   3643 
   3644 	/*
   3645 	 * If filesystem attrs are requested, get them now from the
   3646 	 * directory vp, as most entries will have same filesystem. The only
   3647 	 * exception are mounted over entries but we handle
   3648 	 * those as we go (XXX mounted over detection not yet implemented).
   3649 	 */
   3650 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
   3651 	status = bitmap4_get_sysattrs(sargp);
   3652 	sargp->vap->va_mask = va_mask;
   3653 
   3654 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
   3655 		/*
   3656 		 * Failed to get filesystem attributes.
   3657 		 * Return a rdattr_error for each entry, but don't fail.
   3658 		 * However, don't get any obj-dependent attrs.
   3659 		 */
   3660 		sargp->rdattr_error = status;	/* for rdattr_error */
   3661 		*need_to_lookup = FALSE;
   3662 		/*
   3663 		 * At least get fileid for regular readdir output
   3664 		 */
   3665 		sargp->vap->va_mask &= AT_NODEID;
   3666 		status = NFS4_OK;
   3667 	}
   3668 
   3669 	return (status);
   3670 }
   3671 
   3672 /*
   3673  * readlink: args: CURRENT_FH.
   3674  *	res: status. If success - CURRENT_FH unchanged, return linktext.
   3675  */
   3676 
   3677 /* ARGSUSED */
   3678 static void
   3679 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3680     struct compound_state *cs)
   3681 {
   3682 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3683 	int error;
   3684 	vnode_t *vp;
   3685 	struct iovec iov;
   3686 	struct vattr va;
   3687 	struct uio uio;
   3688 	char *data;
   3689 	struct sockaddr *ca;
   3690 	char *name = NULL;
   3691 
   3692 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
   3693 
   3694 	/* CURRENT_FH: directory */
   3695 	vp = cs->vp;
   3696 	if (vp == NULL) {
   3697 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3698 		goto out;
   3699 	}
   3700 
   3701 	if (cs->access == CS_ACCESS_DENIED) {
   3702 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3703 		goto out;
   3704 	}
   3705 
   3706 	if (vp->v_type == VDIR) {
   3707 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   3708 		goto out;
   3709 	}
   3710 
   3711 	if (vp->v_type != VLNK) {
   3712 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   3713 		goto out;
   3714 	}
   3715 
   3716 	va.va_mask = AT_MODE;
   3717 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
   3718 	if (error) {
   3719 		*cs->statusp = resp->status = puterrno4(error);
   3720 		goto out;
   3721 	}
   3722 
   3723 	if (MANDLOCK(vp, va.va_mode)) {
   3724 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3725 		goto out;
   3726 	}
   3727 
   3728 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
   3729 
   3730 	iov.iov_base = data;
   3731 	iov.iov_len = MAXPATHLEN;
   3732 	uio.uio_iov = &iov;
   3733 	uio.uio_iovcnt = 1;
   3734 	uio.uio_segflg = UIO_SYSSPACE;
   3735 	uio.uio_extflg = UIO_COPY_CACHED;
   3736 	uio.uio_loffset = 0;
   3737 	uio.uio_resid = MAXPATHLEN;
   3738 
   3739 	error = VOP_READLINK(vp, &uio, cs->cr, NULL);
   3740 
   3741 	if (error) {
   3742 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3743 		*cs->statusp = resp->status = puterrno4(error);
   3744 		goto out;
   3745 	}
   3746 
   3747 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
   3748 
   3749 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   3750 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
   3751 	    MAXPATHLEN  + 1);
   3752 
   3753 	if (name == NULL) {
   3754 		/*
   3755 		 * Even though the conversion failed, we return
   3756 		 * something. We just don't translate it.
   3757 		 */
   3758 		name = data;
   3759 	}
   3760 
   3761 	/*
   3762 	 * treat link name as data
   3763 	 */
   3764 	(void) str_to_utf8(name, &resp->link);
   3765 
   3766 	if (name != data)
   3767 		kmem_free(name, MAXPATHLEN + 1);
   3768 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3769 	*cs->statusp = resp->status = NFS4_OK;
   3770 
   3771 out:
   3772 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
   3773 	    READLINK4res *, resp);
   3774 }
   3775 
   3776 static void
   3777 rfs4_op_readlink_free(nfs_resop4 *resop)
   3778 {
   3779 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3780 	utf8string *symlink = &resp->link;
   3781 
   3782 	if (symlink->utf8string_val) {
   3783 		UTF8STRING_FREE(*symlink)
   3784 	}
   3785 }
   3786 
   3787 /*
   3788  * release_lockowner:
   3789  *	Release any state associated with the supplied
   3790  *	lockowner. Note if any lo_state is holding locks we will not
   3791  *	rele that lo_state and thus the lockowner will not be destroyed.
   3792  *	A client using lock after the lock owner stateid has been released
   3793  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
   3794  *	to reissue the lock with new_lock_owner set to TRUE.
   3795  *	args: lock_owner
   3796  *	res:  status
   3797  */
   3798 /* ARGSUSED */
   3799 static void
   3800 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
   3801     struct svc_req *req, struct compound_state *cs)
   3802 {
   3803 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
   3804 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
   3805 	rfs4_lockowner_t *lo;
   3806 	rfs4_openowner_t *oo;
   3807 	rfs4_state_t *sp;
   3808 	rfs4_lo_state_t *lsp;
   3809 	rfs4_client_t *cp;
   3810 	bool_t create = FALSE;
   3811 	locklist_t *llist;
   3812 	sysid_t sysid;
   3813 
   3814 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
   3815 	    cs, RELEASE_LOCKOWNER4args *, ap);
   3816 
   3817 	/* Make sure there is a clientid around for this request */
   3818 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
   3819 
   3820 	if (cp == NULL) {
   3821 		*cs->statusp = resp->status =
   3822 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
   3823 		goto out;
   3824 	}
   3825 	rfs4_client_rele(cp);
   3826 
   3827 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
   3828 	if (lo == NULL) {
   3829 		*cs->statusp = resp->status = NFS4_OK;
   3830 		goto out;
   3831 	}
   3832 	ASSERT(lo->rl_client != NULL);
   3833 
   3834 	/*
   3835 	 * Check for EXPIRED client. If so will reap state with in a lease
   3836 	 * period or on next set_clientid_confirm step
   3837 	 */
   3838 	if (rfs4_lease_expired(lo->rl_client)) {
   3839 		rfs4_lockowner_rele(lo);
   3840 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   3841 		goto out;
   3842 	}
   3843 
   3844 	/*
   3845 	 * If no sysid has been assigned, then no locks exist; just return.
   3846 	 */
   3847 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
   3848 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
   3849 		rfs4_lockowner_rele(lo);
   3850 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   3851 		goto out;
   3852 	}
   3853 
   3854 	sysid = lo->rl_client->rc_sysidt;
   3855 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   3856 
   3857 	/*
   3858 	 * Mark the lockowner invalid.
   3859 	 */
   3860 	rfs4_dbe_hide(lo->rl_dbe);
   3861 
   3862 	/*
   3863 	 * sysid-pid pair should now not be used since the lockowner is
   3864 	 * invalid. If the client were to instantiate the lockowner again
   3865 	 * it would be assigned a new pid. Thus we can get the list of
   3866 	 * current locks.
   3867 	 */
   3868 
   3869 	llist = flk_get_active_locks(sysid, lo->rl_pid);
   3870 	/* If we are still holding locks fail */
   3871 	if (llist != NULL) {
   3872 
   3873 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
   3874 
   3875 		flk_free_locklist(llist);
   3876 		/*
   3877 		 * We need to unhide the lockowner so the client can
   3878 		 * try it again. The bad thing here is if the client
   3879 		 * has a logic error that took it here in the first place
   3880 		 * he probably has lost accounting of the locks that it
   3881 		 * is holding. So we may have dangling state until the
   3882 		 * open owner state is reaped via close. One scenario
   3883 		 * that could possibly occur is that the client has
   3884 		 * sent the unlock request(s) in separate threads
   3885 		 * and has not waited for the replies before sending the
   3886 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
   3887 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
   3888 		 * reissuing the request.
   3889 		 */
   3890 		rfs4_dbe_unhide(lo->rl_dbe);
   3891 		rfs4_lockowner_rele(lo);
   3892 		goto out;
   3893 	}
   3894 
   3895 	/*
   3896 	 * For the corresponding client we need to check each open
   3897 	 * owner for any opens that have lockowner state associated
   3898 	 * with this lockowner.
   3899 	 */
   3900 
   3901 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
   3902 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
   3903 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
   3904 
   3905 		rfs4_dbe_lock(oo->ro_dbe);
   3906 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
   3907 		    sp = list_next(&oo->ro_statelist, sp)) {
   3908 
   3909 			rfs4_dbe_lock(sp->rs_dbe);
   3910 			for (lsp = list_head(&sp->rs_lostatelist);
   3911 			    lsp != NULL;
   3912 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
   3913 				if (lsp->rls_locker == lo) {
   3914 					rfs4_dbe_lock(lsp->rls_dbe);
   3915 					rfs4_dbe_invalidate(lsp->rls_dbe);
   3916 					rfs4_dbe_unlock(lsp->rls_dbe);
   3917 				}
   3918 			}
   3919 			rfs4_dbe_unlock(sp->rs_dbe);
   3920 		}
   3921 		rfs4_dbe_unlock(oo->ro_dbe);
   3922 	}
   3923 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   3924 
   3925 	rfs4_lockowner_rele(lo);
   3926 
   3927 	*cs->statusp = resp->status = NFS4_OK;
   3928 
   3929 out:
   3930 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
   3931 	    cs, RELEASE_LOCKOWNER4res *, resp);
   3932 }
   3933 
   3934 /*
   3935  * short utility function to lookup a file and recall the delegation
   3936  */
   3937 static rfs4_file_t *
   3938 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
   3939     int *lkup_error, cred_t *cr)
   3940 {
   3941 	vnode_t *vp;
   3942 	rfs4_file_t *fp = NULL;
   3943 	bool_t fcreate = FALSE;
   3944 	int error;
   3945 
   3946 	if (vpp)
   3947 		*vpp = NULL;
   3948 
   3949 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
   3950 	    NULL)) == 0) {
   3951 		if (vp->v_type == VREG)
   3952 			fp = rfs4_findfile(vp, NULL, &fcreate);
   3953 		if (vpp)
   3954 			*vpp = vp;
   3955 		else
   3956 			VN_RELE(vp);
   3957 	}
   3958 
   3959 	if (lkup_error)
   3960 		*lkup_error = error;
   3961 
   3962 	return (fp);
   3963 }
   3964 
   3965 /*
   3966  * remove: args: CURRENT_FH: directory; name.
   3967  *	res: status. If success - CURRENT_FH unchanged, return change_info
   3968  *		for directory.
   3969  */
   3970 /* ARGSUSED */
   3971 static void
   3972 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3973     struct compound_state *cs)
   3974 {
   3975 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
   3976 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
   3977 	int error;
   3978 	vnode_t *dvp, *vp;
   3979 	struct vattr bdva, idva, adva;
   3980 	char *nm;
   3981 	uint_t len;
   3982 	rfs4_file_t *fp;
   3983 	int in_crit = 0;
   3984 	bslabel_t *clabel;
   3985 	struct sockaddr *ca;
   3986 	char *name = NULL;
   3987 
   3988 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
   3989 	    REMOVE4args *, args);
   3990 
   3991 	/* CURRENT_FH: directory */
   3992 	dvp = cs->vp;
   3993 	if (dvp == NULL) {
   3994 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3995 		goto out;
   3996 	}
   3997 
   3998 	if (cs->access == CS_ACCESS_DENIED) {
   3999 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4000 		goto out;
   4001 	}
   4002 
   4003 	/*
   4004 	 * If there is an unshared filesystem mounted on this vnode,
   4005 	 * Do not allow to remove anything in this directory.
   4006 	 */
   4007 	if (vn_ismntpt(dvp)) {
   4008 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4009 		goto out;
   4010 	}
   4011 
   4012 	if (dvp->v_type != VDIR) {
   4013 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   4014 		goto out;
   4015 	}
   4016 
   4017 	if (!utf8_dir_verify(&args->target)) {
   4018 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4019 		goto out;
   4020 	}
   4021 
   4022 	/*
   4023 	 * Lookup the file so that we can check if it's a directory
   4024 	 */
   4025 	nm = utf8_to_fn(&args->target, &len, NULL);
   4026 	if (nm == NULL) {
   4027 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4028 		goto out;
   4029 	}
   4030 
   4031 	if (len > MAXNAMELEN) {
   4032 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   4033 		kmem_free(nm, len);
   4034 		goto out;
   4035 	}
   4036 
   4037 	if (rdonly4(cs->exi, cs->vp, req)) {
   4038 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   4039 		kmem_free(nm, len);
   4040 		goto out;
   4041 	}
   4042 
   4043 	/* If necessary, convert to UTF-8 for illbehaved clients */
   4044 
   4045 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   4046 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   4047 	    MAXPATHLEN  + 1);
   4048 
   4049 	if (name == NULL) {
   4050 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4051 		kmem_free(nm, len);
   4052 		goto out;
   4053 	}
   4054 
   4055 	/*
   4056 	 * Lookup the file to determine type and while we are see if
   4057 	 * there is a file struct around and check for delegation.
   4058 	 * We don't need to acquire va_seq before this lookup, if
   4059 	 * it causes an update, cinfo.before will not match, which will
   4060 	 * trigger a cache flush even if atomic is TRUE.
   4061 	 */
   4062 	if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
   4063 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
   4064 		    NULL)) {
   4065 			VN_RELE(vp);
   4066 			rfs4_file_rele(fp);
   4067 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4068 			if (nm != name)
   4069 				kmem_free(name, MAXPATHLEN + 1);
   4070 			kmem_free(nm, len);
   4071 			goto out;
   4072 		}
   4073 	}
   4074 
   4075 	/* Didn't find anything to remove */
   4076 	if (vp == NULL) {
   4077 		*cs->statusp = resp->status = error;
   4078 		if (nm != name)
   4079 			kmem_free(name, MAXPATHLEN + 1);
   4080 		kmem_free(nm, len);
   4081 		goto out;
   4082 	}
   4083 
   4084 	if (nbl_need_check(vp)) {
   4085 		nbl_start_crit(vp, RW_READER);
   4086 		in_crit = 1;
   4087 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
   4088 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4089 			if (nm != name)
   4090 				kmem_free(name, MAXPATHLEN + 1);
   4091 			kmem_free(nm, len);
   4092 			nbl_end_crit(vp);
   4093 			VN_RELE(vp);
   4094 			if (fp) {
   4095 				rfs4_clear_dont_grant(fp);
   4096 				rfs4_file_rele(fp);
   4097 			}
   4098 			goto out;
   4099 		}
   4100 	}
   4101 
   4102 	/* check label before allowing removal */
   4103 	if (is_system_labeled()) {
   4104 		ASSERT(req->rq_label != NULL);
   4105 		clabel = req->rq_label;
   4106 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   4107 		    "got client label from request(1)",
   4108 		    struct svc_req *, req);
   4109 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   4110 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
   4111 			    cs->exi)) {
   4112 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4113 				if (name != nm)
   4114 					kmem_free(name, MAXPATHLEN + 1);
   4115 				kmem_free(nm, len);
   4116 				if (in_crit)
   4117 					nbl_end_crit(vp);
   4118 				VN_RELE(vp);
   4119 				if (fp) {
   4120 					rfs4_clear_dont_grant(fp);
   4121 					rfs4_file_rele(fp);
   4122 				}
   4123 				goto out;
   4124 			}
   4125 		}
   4126 	}
   4127 
   4128 	/* Get dir "before" change value */
   4129 	bdva.va_mask = AT_CTIME|AT_SEQ;
   4130 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
   4131 	if (error) {
   4132 		*cs->statusp = resp->status = puterrno4(error);
   4133 		if (nm != name)
   4134 			kmem_free(name, MAXPATHLEN + 1);
   4135 		kmem_free(nm, len);
   4136 		if (in_crit)
   4137 			nbl_end_crit(vp);
   4138 		VN_RELE(vp);
   4139 		if (fp) {
   4140 			rfs4_clear_dont_grant(fp);
   4141 			rfs4_file_rele(fp);
   4142 		}
   4143 		goto out;
   4144 	}
   4145 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   4146 
   4147 	/* Actually do the REMOVE operation */
   4148 	if (vp->v_type == VDIR) {
   4149 		/*
   4150 		 * Can't remove a directory that has a mounted-on filesystem.
   4151 		 */
   4152 		if (vn_ismntpt(vp)) {
   4153 			error = EACCES;
   4154 		} else {
   4155 			/*
   4156 			 * System V defines rmdir to return EEXIST,
   4157 			 * not * ENOTEMPTY, if the directory is not
   4158 			 * empty.  A System V NFS server needs to map
   4159 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
   4160 			 * transmit over the wire.
   4161 			 */
   4162 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr,
   4163 			    NULL, 0)) == EEXIST)
   4164 				error = ENOTEMPTY;
   4165 		}
   4166 	} else {
   4167 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
   4168 		    fp != NULL) {
   4169 			struct vattr va;
   4170 			vnode_t *tvp;
   4171 
   4172 			rfs4_dbe_lock(fp->rf_dbe);
   4173 			tvp = fp->rf_vp;
   4174 			if (tvp)
   4175 				VN_HOLD(tvp);
   4176 			rfs4_dbe_unlock(fp->rf_dbe);
   4177 
   4178 			if (tvp) {
   4179 				/*
   4180 				 * This is va_seq safe because we are not
   4181 				 * manipulating dvp.
   4182 				 */
   4183 				va.va_mask = AT_NLINK;
   4184 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
   4185 				    va.va_nlink == 0) {
   4186 					/* Remove state on file remove */
   4187 					if (in_crit) {
   4188 						nbl_end_crit(vp);
   4189 						in_crit = 0;
   4190 					}
   4191 					rfs4_close_all_state(fp);
   4192 				}
   4193 				VN_RELE(tvp);
   4194 			}
   4195 		}
   4196 	}
   4197 
   4198 	if (in_crit)
   4199 		nbl_end_crit(vp);
   4200 	VN_RELE(vp);
   4201 
   4202 	if (fp) {
   4203 		rfs4_clear_dont_grant(fp);
   4204 		rfs4_file_rele(fp);
   4205 	}
   4206 	if (nm != name)
   4207 		kmem_free(name, MAXPATHLEN + 1);
   4208 	kmem_free(nm, len);
   4209 
   4210 	if (error) {
   4211 		*cs->statusp = resp->status = puterrno4(error);
   4212 		goto out;
   4213 	}
   4214 
   4215 	/*
   4216 	 * Get the initial "after" sequence number, if it fails, set to zero
   4217 	 */
   4218 	idva.va_mask = AT_SEQ;
   4219 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
   4220 		idva.va_seq = 0;
   4221 
   4222 	/*
   4223 	 * Force modified data and metadata out to stable storage.
   4224 	 */
   4225 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   4226 
   4227 	/*
   4228 	 * Get "after" change value, if it fails, simply return the
   4229 	 * before value.
   4230 	 */
   4231 	adva.va_mask = AT_CTIME|AT_SEQ;
   4232 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
   4233 		adva.va_ctime = bdva.va_ctime;
   4234 		adva.va_seq = 0;
   4235 	}
   4236 
   4237 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   4238 
   4239 	/*
   4240 	 * The cinfo.atomic = TRUE only if we have
   4241 	 * non-zero va_seq's, and it has incremented by exactly one
   4242 	 * during the VOP_REMOVE/RMDIR and it didn't change during
   4243 	 * the VOP_FSYNC.
   4244 	 */
   4245 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   4246 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
   4247 		resp->cinfo.atomic = TRUE;
   4248 	else
   4249 		resp->cinfo.atomic = FALSE;
   4250 
   4251 	*cs->statusp = resp->status = NFS4_OK;
   4252 
   4253 out:
   4254 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
   4255 	    REMOVE4res *, resp);
   4256 }
   4257 
   4258 /*
   4259  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
   4260  *		oldname and newname.
   4261  *	res: status. If success - CURRENT_FH unchanged, return change_info
   4262  *		for both from and target directories.
   4263  */
   4264 /* ARGSUSED */
   4265 static void
   4266 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4267     struct compound_state *cs)
   4268 {
   4269 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
   4270 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
   4271 	int error;
   4272 	vnode_t *odvp;
   4273 	vnode_t *ndvp;
   4274 	vnode_t *srcvp, *targvp;
   4275 	struct vattr obdva, oidva, oadva;
   4276 	struct vattr nbdva, nidva, nadva;
   4277 	char *onm, *nnm;
   4278 	uint_t olen, nlen;
   4279 	rfs4_file_t *fp, *sfp;
   4280 	int in_crit_src, in_crit_targ;
   4281 	int fp_rele_grant_hold, sfp_rele_grant_hold;
   4282 	bslabel_t *clabel;
   4283 	struct sockaddr *ca;
   4284 	char *converted_onm = NULL;
   4285 	char *converted_nnm = NULL;
   4286 
   4287 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
   4288 	    RENAME4args *, args);
   4289 
   4290 	fp = sfp = NULL;
   4291 	srcvp = targvp = NULL;
   4292 	in_crit_src = in_crit_targ = 0;
   4293 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
   4294 
   4295 	/* CURRENT_FH: target directory */
   4296 	ndvp = cs->vp;
   4297 	if (ndvp == NULL) {
   4298 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4299 		goto out;
   4300 	}
   4301 
   4302 	/* SAVED_FH: from directory */
   4303 	odvp = cs->saved_vp;
   4304 	if (odvp == NULL) {
   4305 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4306 		goto out;
   4307 	}
   4308 
   4309 	if (cs->access == CS_ACCESS_DENIED) {
   4310 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4311 		goto out;
   4312 	}
   4313 
   4314 	/*
   4315 	 * If there is an unshared filesystem mounted on this vnode,
   4316 	 * do not allow to rename objects in this directory.
   4317 	 */
   4318 	if (vn_ismntpt(odvp)) {
   4319 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4320 		goto out;
   4321 	}
   4322 
   4323 	/*
   4324 	 * If there is an unshared filesystem mounted on this vnode,
   4325 	 * do not allow to rename to this directory.
   4326 	 */
   4327 	if (vn_ismntpt(ndvp)) {
   4328 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4329 		goto out;
   4330 	}
   4331 
   4332 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
   4333 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   4334 		goto out;
   4335 	}
   4336 
   4337 	if (cs->saved_exi != cs->exi) {
   4338 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   4339 		goto out;
   4340 	}
   4341 
   4342 	if (!utf8_dir_verify(&args->oldname)) {
   4343 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4344 		goto out;
   4345 	}
   4346 
   4347 	if (!utf8_dir_verify(&args->newname)) {
   4348 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4349 		goto out;
   4350 	}
   4351 
   4352 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
   4353 	if (onm == NULL) {
   4354 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4355 		goto out;
   4356 	}
   4357 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   4358 	nlen = MAXPATHLEN + 1;
   4359 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
   4360 	    nlen);
   4361 
   4362 	if (converted_onm == NULL) {
   4363 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4364 		kmem_free(onm, olen);
   4365 		goto out;
   4366 	}
   4367 
   4368 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   4369 	if (nnm == NULL) {
   4370 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4371 		if (onm != converted_onm)
   4372 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4373 		kmem_free(onm, olen);
   4374 		goto out;
   4375 	}
   4376 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
   4377 	    MAXPATHLEN  + 1);
   4378 
   4379 	if (converted_nnm == NULL) {
   4380 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4381 		kmem_free(nnm, nlen);
   4382 		nnm = NULL;
   4383 		if (onm != converted_onm)
   4384 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4385 		kmem_free(onm, olen);
   4386 		goto out;
   4387 	}
   4388 
   4389 
   4390 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
   4391 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   4392 		kmem_free(onm, olen);
   4393 		kmem_free(nnm, nlen);
   4394 		goto out;
   4395 	}
   4396 
   4397 
   4398 	if (rdonly4(cs->exi, cs->vp, req)) {
   4399 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   4400 		if (onm != converted_onm)
   4401 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4402 		kmem_free(onm, olen);
   4403 		if (nnm != converted_nnm)
   4404 			kmem_free(converted_nnm, MAXPATHLEN + 1);
   4405 		kmem_free(nnm, nlen);
   4406 		goto out;
   4407 	}
   4408 
   4409 	/* check label of the target dir */
   4410 	if (is_system_labeled()) {
   4411 		ASSERT(req->rq_label != NULL);
   4412 		clabel = req->rq_label;
   4413 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
   4414 		    "got client label from request(1)",
   4415 		    struct svc_req *, req);
   4416 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   4417 			if (!do_rfs_label_check(clabel, ndvp,
   4418 			    EQUALITY_CHECK, cs->exi)) {
   4419 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4420 				goto err_out;
   4421 			}
   4422 		}
   4423 	}
   4424 
   4425 	/*
   4426 	 * Is the source a file and have a delegation?
   4427 	 * We don't need to acquire va_seq before these lookups, if
   4428 	 * it causes an update, cinfo.before will not match, which will
   4429 	 * trigger a cache flush even if atomic is TRUE.
   4430 	 */
   4431 	if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
   4432 	    &error, cs->cr)) {
   4433 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
   4434 		    NULL)) {
   4435 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4436 			goto err_out;
   4437 		}
   4438 	}
   4439 
   4440 	if (srcvp == NULL) {
   4441 		*cs->statusp = resp->status = puterrno4(error);
   4442 		if (onm != converted_onm)
   4443 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4444 		kmem_free(onm, olen);
   4445 		if (nnm != converted_nnm)
   4446 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4447 		kmem_free(nnm, nlen);
   4448 		goto out;
   4449 	}
   4450 
   4451 	sfp_rele_grant_hold = 1;
   4452 
   4453 	/* Does the destination exist and a file and have a delegation? */
   4454 	if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
   4455 	    NULL, cs->cr)) {
   4456 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
   4457 		    NULL)) {
   4458 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4459 			goto err_out;
   4460 		}
   4461 	}
   4462 	fp_rele_grant_hold = 1;
   4463 
   4464 
   4465 	/* Check for NBMAND lock on both source and target */
   4466 	if (nbl_need_check(srcvp)) {
   4467 		nbl_start_crit(srcvp, RW_READER);
   4468 		in_crit_src = 1;
   4469 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
   4470 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4471 			goto err_out;
   4472 		}
   4473 	}
   4474 
   4475 	if (targvp && nbl_need_check(targvp)) {
   4476 		nbl_start_crit(targvp, RW_READER);
   4477 		in_crit_targ = 1;
   4478 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
   4479 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4480 			goto err_out;
   4481 		}
   4482 	}
   4483 
   4484 	/* Get source "before" change value */
   4485 	obdva.va_mask = AT_CTIME|AT_SEQ;
   4486 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
   4487 	if (!error) {
   4488 		nbdva.va_mask = AT_CTIME|AT_SEQ;
   4489 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
   4490 	}
   4491 	if (error) {
   4492 		*cs->statusp = resp->status = puterrno4(error);
   4493 		goto err_out;
   4494 	}
   4495 
   4496 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
   4497 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
   4498 
   4499 	if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
   4500 	    cs->cr, NULL, 0)) == 0 && fp != NULL) {
   4501 		struct vattr va;
   4502 		vnode_t *tvp;
   4503 
   4504 		rfs4_dbe_lock(fp->rf_dbe);
   4505 		tvp = fp->rf_vp;
   4506 		if (tvp)
   4507 			VN_HOLD(tvp);
   4508 		rfs4_dbe_unlock(fp->rf_dbe);
   4509 
   4510 		if (tvp) {
   4511 			va.va_mask = AT_NLINK;
   4512 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
   4513 			    va.va_nlink == 0) {
   4514 				/* The file is gone and so should the state */
   4515 				if (in_crit_targ) {
   4516 					nbl_end_crit(targvp);
   4517 					in_crit_targ = 0;
   4518 				}
   4519 				rfs4_close_all_state(fp);
   4520 			}
   4521 			VN_RELE(tvp);
   4522 		}
   4523 	}
   4524 	if (error == 0)
   4525 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
   4526 
   4527 	if (in_crit_src)
   4528 		nbl_end_crit(srcvp);
   4529 	if (srcvp)
   4530 		VN_RELE(srcvp);
   4531 	if (in_crit_targ)
   4532 		nbl_end_crit(targvp);
   4533 	if (targvp)
   4534 		VN_RELE(targvp);
   4535 
   4536 	if (sfp) {
   4537 		rfs4_clear_dont_grant(sfp);
   4538 		rfs4_file_rele(sfp);
   4539 	}
   4540 	if (fp) {
   4541 		rfs4_clear_dont_grant(fp);
   4542 		rfs4_file_rele(fp);
   4543 	}
   4544 
   4545 	if (converted_onm != onm)
   4546 		kmem_free(converted_onm, MAXPATHLEN + 1);
   4547 	kmem_free(onm, olen);
   4548 	if (converted_nnm != nnm)
   4549 		kmem_free(converted_nnm, MAXPATHLEN + 1);
   4550 	kmem_free(nnm, nlen);
   4551 
   4552 	/*
   4553 	 * Get the initial "after" sequence number, if it fails, set to zero
   4554 	 */
   4555 	oidva.va_mask = AT_SEQ;
   4556 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
   4557 		oidva.va_seq = 0;
   4558 
   4559 	nidva.va_mask = AT_SEQ;
   4560 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
   4561 		nidva.va_seq = 0;
   4562 
   4563 	/*
   4564 	 * Force modified data and metadata out to stable storage.
   4565 	 */
   4566 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
   4567 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
   4568 
   4569 	if (error) {
   4570 		*cs->statusp = resp->status = puterrno4(error);
   4571 		goto out;
   4572 	}
   4573 
   4574 	/*
   4575 	 * Get "after" change values, if it fails, simply return the
   4576 	 * before value.
   4577 	 */
   4578 	oadva.va_mask = AT_CTIME|AT_SEQ;
   4579 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
   4580 		oadva.va_ctime = obdva.va_ctime;
   4581 		oadva.va_seq = 0;
   4582 	}
   4583 
   4584 	nadva.va_mask = AT_CTIME|AT_SEQ;
   4585 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
   4586 		nadva.va_ctime = nbdva.va_ctime;
   4587 		nadva.va_seq = 0;
   4588 	}
   4589 
   4590 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
   4591 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
   4592 
   4593 	/*
   4594 	 * The cinfo.atomic = TRUE only if we have
   4595 	 * non-zero va_seq's, and it has incremented by exactly one
   4596 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
   4597 	 */
   4598 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
   4599 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
   4600 		resp->source_cinfo.atomic = TRUE;
   4601 	else
   4602 		resp->source_cinfo.atomic = FALSE;
   4603 
   4604 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
   4605 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
   4606 		resp->target_cinfo.atomic = TRUE;
   4607 	else
   4608 		resp->target_cinfo.atomic = FALSE;
   4609 
   4610 #ifdef	VOLATILE_FH_TEST
   4611 	{
   4612 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
   4613 
   4614 	/*
   4615 	 * Add the renamed file handle to the volatile rename list
   4616 	 */
   4617 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
   4618 		/* file handles may expire on rename */
   4619 		vnode_t *vp;
   4620 
   4621 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   4622 		/*
   4623 		 * Already know that nnm will be a valid string
   4624 		 */
   4625 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
   4626 		    NULL, NULL, NULL);
   4627 		kmem_free(nnm, nlen);
   4628 		if (!error) {
   4629 			add_volrnm_fh(cs->exi, vp);
   4630 			VN_RELE(vp);
   4631 		}
   4632 	}
   4633 	}
   4634 #endif	/* VOLATILE_FH_TEST */
   4635 
   4636 	*cs->statusp = resp->status = NFS4_OK;
   4637 out:
   4638 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
   4639 	    RENAME4res *, resp);
   4640 	return;
   4641 
   4642 err_out:
   4643 	if (onm != converted_onm)
   4644 		kmem_free(converted_onm, MAXPATHLEN + 1);
   4645 	if (onm != NULL)
   4646 		kmem_free(onm, olen);
   4647 	if (nnm != converted_nnm)
   4648 		kmem_free(converted_nnm, MAXPATHLEN + 1);
   4649 	if (nnm != NULL)
   4650 		kmem_free(nnm, nlen);
   4651 
   4652 	if (in_crit_src) nbl_end_crit(srcvp);
   4653 	if (in_crit_targ) nbl_end_crit(targvp);
   4654 	if (targvp) VN_RELE(targvp);
   4655 	if (srcvp) VN_RELE(srcvp);
   4656 	if (sfp) {
   4657 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
   4658 		rfs4_file_rele(sfp);
   4659 	}
   4660 	if (fp) {
   4661 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
   4662 		rfs4_file_rele(fp);
   4663 	}
   4664 
   4665 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
   4666 	    RENAME4res *, resp);
   4667 }
   4668 
   4669 /* ARGSUSED */
   4670 static void
   4671 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4672     struct compound_state *cs)
   4673 {
   4674 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
   4675 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
   4676 	rfs4_client_t *cp;
   4677 
   4678 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
   4679 	    RENEW4args *, args);
   4680 
   4681 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
   4682 		*cs->statusp = resp->status =
   4683 		    rfs4_check_clientid(&args->clientid, 0);
   4684 		goto out;
   4685 	}
   4686 
   4687 	if (rfs4_lease_expired(cp)) {
   4688 		rfs4_client_rele(cp);
   4689 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   4690 		goto out;
   4691 	}
   4692 
   4693 	rfs4_update_lease(cp);
   4694 
   4695 	mutex_enter(cp->rc_cbinfo.cb_lock);
   4696 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
   4697 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
   4698 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
   4699 	} else {
   4700 		*cs->statusp = resp->status = NFS4_OK;
   4701 	}
   4702 	mutex_exit(cp->rc_cbinfo.cb_lock);
   4703 
   4704 	rfs4_client_rele(cp);
   4705 
   4706 out:
   4707 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
   4708 	    RENEW4res *, resp);
   4709 }
   4710 
   4711 /* ARGSUSED */
   4712 static void
   4713 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   4714     struct compound_state *cs)
   4715 {
   4716 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
   4717 
   4718 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
   4719 
   4720 	/* No need to check cs->access - we are not accessing any object */
   4721 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
   4722 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
   4723 		goto out;
   4724 	}
   4725 	if (cs->vp != NULL) {
   4726 		VN_RELE(cs->vp);
   4727 	}
   4728 	cs->vp = cs->saved_vp;
   4729 	cs->saved_vp = NULL;
   4730 	cs->exi = cs->saved_exi;
   4731 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
   4732 	*cs->statusp = resp->status = NFS4_OK;
   4733 	cs->deleg = FALSE;
   4734 
   4735 out:
   4736 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
   4737 	    RESTOREFH4res *, resp);
   4738 }
   4739 
   4740 /* ARGSUSED */
   4741 static void
   4742 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4743     struct compound_state *cs)
   4744 {
   4745 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
   4746 
   4747 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
   4748 
   4749 	/* No need to check cs->access - we are not accessing any object */
   4750 	if (cs->vp == NULL) {
   4751 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4752 		goto out;
   4753 	}
   4754 	if (cs->saved_vp != NULL) {
   4755 		VN_RELE(cs->saved_vp);
   4756 	}
   4757 	cs->saved_vp = cs->vp;
   4758 	VN_HOLD(cs->saved_vp);
   4759 	cs->saved_exi = cs->exi;
   4760 	/*
   4761 	 * since SAVEFH is fairly rare, don't alloc space for its fh
   4762 	 * unless necessary.
   4763 	 */
   4764 	if (cs->saved_fh.nfs_fh4_val == NULL) {
   4765 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
   4766 	}
   4767 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
   4768 	*cs->statusp = resp->status = NFS4_OK;
   4769 
   4770 out:
   4771 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
   4772 	    SAVEFH4res *, resp);
   4773 }
   4774 
   4775 /*
   4776  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
   4777  * return the bitmap of attrs that were set successfully. It is also
   4778  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
   4779  * always be called only after rfs4_do_set_attrs().
   4780  *
   4781  * Verify that the attributes are same as the expected ones. sargp->vap
   4782  * and sargp->sbp contain the input attributes as translated from fattr4.
   4783  *
   4784  * This function verifies only the attrs that correspond to a vattr or
   4785  * vfsstat struct. That is because of the extra step needed to get the
   4786  * corresponding system structs. Other attributes have already been set or
   4787  * verified by do_rfs4_set_attrs.
   4788  *
   4789  * Return 0 if all attrs match, -1 if some don't, error if error processing.
   4790  */
   4791 static int
   4792 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
   4793     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
   4794 {
   4795 	int error, ret_error = 0;
   4796 	int i, k;
   4797 	uint_t sva_mask = sargp->vap->va_mask;
   4798 	uint_t vbit;
   4799 	union nfs4_attr_u *na;
   4800 	uint8_t *amap;
   4801 	bool_t getsb = ntovp->vfsstat;
   4802 
   4803 	if (sva_mask != 0) {
   4804 		/*
   4805 		 * Okay to overwrite sargp->vap because we verify based
   4806 		 * on the incoming values.
   4807 		 */
   4808 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
   4809 		    sargp->cs->cr, NULL);
   4810 		if (ret_error) {
   4811 			if (resp == NULL)
   4812 				return (ret_error);
   4813 			/*
   4814 			 * Must return bitmap of successful attrs
   4815 			 */
   4816 			sva_mask = 0;	/* to prevent checking vap later */
   4817 		} else {
   4818 			/*
   4819 			 * Some file systems clobber va_mask. it is probably
   4820 			 * wrong of them to do so, nonethless we practice
   4821 			 * defensive coding.
   4822 			 * See bug id 4276830.
   4823 			 */
   4824 			sargp->vap->va_mask = sva_mask;
   4825 		}
   4826 	}
   4827 
   4828 	if (getsb) {
   4829 		/*
   4830 		 * Now get the superblock and loop on the bitmap, as there is
   4831 		 * no simple way of translating from superblock to bitmap4.
   4832 		 */
   4833 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
   4834 		if (ret_error) {
   4835 			if (resp == NULL)
   4836 				goto errout;
   4837 			getsb = FALSE;
   4838 		}
   4839 	}
   4840 
   4841 	/*
   4842 	 * Now loop and verify each attribute which getattr returned
   4843 	 * whether it's the same as the input.
   4844 	 */
   4845 	if (resp == NULL && !getsb && (sva_mask == 0))
   4846 		goto errout;
   4847 
   4848 	na = ntovp->na;
   4849 	amap = ntovp->amap;
   4850 	k = 0;
   4851 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
   4852 		k = *amap;
   4853 		ASSERT(nfs4_ntov_map[k].nval == k);
   4854 		vbit = nfs4_ntov_map[k].vbit;
   4855 
   4856 		/*
   4857 		 * If vattr attribute but VOP_GETATTR failed, or it's
   4858 		 * superblock attribute but VFS_STATVFS failed, skip
   4859 		 */
   4860 		if (vbit) {
   4861 			if ((vbit & sva_mask) == 0)
   4862 				continue;
   4863 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
   4864 			continue;
   4865 		}
   4866 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
   4867 		if (resp != NULL) {
   4868 			if (error)
   4869 				ret_error = -1;	/* not all match */
   4870 			else	/* update response bitmap */
   4871 				*resp |= nfs4_ntov_map[k].fbit;
   4872 			continue;
   4873 		}
   4874 		if (error) {
   4875 			ret_error = -1;	/* not all match */
   4876 			break;
   4877 		}
   4878 	}
   4879 errout:
   4880 	return (ret_error);
   4881 }
   4882 
   4883 /*
   4884  * Decode the attribute to be set/verified. If the attr requires a sys op
   4885  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
   4886  * call the sv_getit function for it, because the sys op hasn't yet been done.
   4887  * Return 0 for success, error code if failed.
   4888  *
   4889  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
   4890  */
   4891 static int
   4892 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
   4893     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
   4894 {
   4895 	int error = 0;
   4896 	bool_t set_later;
   4897 
   4898 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
   4899 
   4900 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
   4901 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
   4902 		/*
   4903 		 * don't verify yet if a vattr or sb dependent attr,
   4904 		 * because we don't have their sys values yet.
   4905 		 * Will be done later.
   4906 		 */
   4907 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
   4908 			/*
   4909 			 * ACLs are a special case, since setting the MODE
   4910 			 * conflicts with setting the ACL.  We delay setting
   4911 			 * the ACL until all other attributes have been set.
   4912 			 * The ACL gets set in do_rfs4_op_setattr().
   4913 			 */
   4914 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
   4915 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
   4916 				    sargp, nap);
   4917 				if (error) {
   4918 					xdr_free(nfs4_ntov_map[k].xfunc,
   4919 					    (caddr_t)nap);
   4920 				}
   4921 			}
   4922 		}
   4923 	} else {
   4924 #ifdef  DEBUG
   4925 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
   4926 		    "decoding attribute %d\n", k);
   4927 #endif
   4928 		error = EINVAL;
   4929 	}
   4930 	if (!error && resp_bval && !set_later) {
   4931 		*resp_bval |= nfs4_ntov_map[k].fbit;
   4932 	}
   4933 
   4934 	return (error);
   4935 }
   4936 
   4937 /*
   4938  * Set vattr based on incoming fattr4 attrs - used by setattr.
   4939  * Set response mask. Ignore any values that are not writable vattr attrs.
   4940  */
   4941 static nfsstat4
   4942 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
   4943     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
   4944     nfs4_attr_cmd_t cmd)
   4945 {
   4946 	int error = 0;
   4947 	int i;
   4948 	char *attrs = fattrp->attrlist4;
   4949 	uint32_t attrslen = fattrp->attrlist4_len;
   4950 	XDR xdr;
   4951 	nfsstat4 status = NFS4_OK;
   4952 	vnode_t *vp = cs->vp;
   4953 	union nfs4_attr_u *na;
   4954 	uint8_t *amap;
   4955 
   4956 #ifndef lint
   4957 	/*
   4958 	 * Make sure that maximum attribute number can be expressed as an
   4959 	 * 8 bit quantity.
   4960 	 */
   4961 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
   4962 #endif
   4963 
   4964 	if (vp == NULL) {
   4965 		if (resp)
   4966 			*resp = 0;
   4967 		return (NFS4ERR_NOFILEHANDLE);
   4968 	}
   4969 	if (cs->access == CS_ACCESS_DENIED) {
   4970 		if (resp)
   4971 			*resp = 0;
   4972 		return (NFS4ERR_ACCESS);
   4973 	}
   4974 
   4975 	sargp->op = cmd;
   4976 	sargp->cs = cs;
   4977 	sargp->flag = 0;	/* may be set later */
   4978 	sargp->vap->va_mask = 0;
   4979 	sargp->rdattr_error = NFS4_OK;
   4980 	sargp->rdattr_error_req = FALSE;
   4981 	/* sargp->sbp is set by the caller */
   4982 
   4983 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
   4984 
   4985 	na = ntovp->na;
   4986 	amap = ntovp->amap;
   4987 
   4988 	/*
   4989 	 * The following loop iterates on the nfs4_ntov_map checking
   4990 	 * if the fbit is set in the requested bitmap.
   4991 	 * If set then we process the arguments using the
   4992 	 * rfs4_fattr4 conversion functions to populate the setattr
   4993 	 * vattr and va_mask. Any settable attrs that are not using vattr
   4994 	 * will be set in this loop.
   4995 	 */
   4996 	for (i = 0; i < nfs4_ntov_map_size; i++) {
   4997 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
   4998 			continue;
   4999 		}
   5000 		/*
   5001 		 * If setattr, must be a writable attr.
   5002 		 * If verify/nverify, must be a readable attr.
   5003 		 */
   5004 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
   5005 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
   5006 			/*
   5007 			 * Client tries to set/verify an
   5008 			 * unsupported attribute, tries to set
   5009 			 * a read only attr or verify a write
   5010 			 * only one - error!
   5011 			 */
   5012 			break;
   5013 		}
   5014 		/*
   5015 		 * Decode the attribute to set/verify
   5016 		 */
   5017 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
   5018 		    &xdr, resp ? resp : NULL, na);
   5019 		if (error)
   5020 			break;
   5021 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
   5022 		na++;
   5023 		(ntovp->attrcnt)++;
   5024 		if (nfs4_ntov_map[i].vfsstat)
   5025 			ntovp->vfsstat = TRUE;
   5026 	}
   5027 
   5028 	if (error != 0)
   5029 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
   5030 		    puterrno4(error));
   5031 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
   5032 	return (status);
   5033 }
   5034 
   5035 static nfsstat4
   5036 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
   5037     stateid4 *stateid)
   5038 {
   5039 	int error = 0;
   5040 	struct nfs4_svgetit_arg sarg;
   5041 	bool_t trunc;
   5042 
   5043 	nfsstat4 status = NFS4_OK;
   5044 	cred_t *cr = cs->cr;
   5045 	vnode_t *vp = cs->vp;
   5046 	struct nfs4_ntov_table ntov;
   5047 	struct statvfs64 sb;
   5048 	struct vattr bva;
   5049 	struct flock64 bf;
   5050 	int in_crit = 0;
   5051 	uint_t saved_mask = 0;
   5052 	caller_context_t ct;
   5053 
   5054 	*resp = 0;
   5055 	sarg.sbp = &sb;
   5056 	nfs4_ntov_table_init(&ntov);
   5057 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
   5058 	    NFS4ATTR_SETIT);
   5059 	if (status != NFS4_OK) {
   5060 		/*
   5061 		 * failed set attrs
   5062 		 */
   5063 		goto done;
   5064 	}
   5065 	if ((sarg.vap->va_mask == 0) &&
   5066 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
   5067 		/*
   5068 		 * no further work to be done
   5069 		 */
   5070 		goto done;
   5071 	}
   5072 
   5073 	/*
   5074 	 * If we got a request to set the ACL and the MODE, only
   5075 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
   5076 	 * to change any other bits, along with setting an ACL,
   5077 	 * gives NFS4ERR_INVAL.
   5078 	 */
   5079 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
   5080 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
   5081 		vattr_t va;
   5082 
   5083 		va.va_mask = AT_MODE;
   5084 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
   5085 		if (error) {
   5086 			status = puterrno4(error);
   5087 			goto done;
   5088 		}
   5089 		if ((sarg.vap->va_mode ^ va.va_mode) &
   5090 		    ~(VSUID | VSGID | VSVTX)) {
   5091 			status = NFS4ERR_INVAL;
   5092 			goto done;
   5093 		}
   5094 	}
   5095 
   5096 	/* Check stateid only if size has been set */
   5097 	if (sarg.vap->va_mask & AT_SIZE) {
   5098 		trunc = (sarg.vap->va_size == 0);
   5099 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
   5100 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
   5101 		if (status != NFS4_OK)
   5102 			goto done;
   5103 	} else {
   5104 		ct.cc_sysid = 0;
   5105 		ct.cc_pid = 0;
   5106 		ct.cc_caller_id = nfs4_srv_caller_id;
   5107 		ct.cc_flags = CC_DONTBLOCK;
   5108 	}
   5109 
   5110 	/* XXX start of possible race with delegations */
   5111 
   5112 	/*
   5113 	 * We need to specially handle size changes because it is
   5114 	 * possible for the client to create a file with read-only
   5115 	 * modes, but with the file opened for writing. If the client
   5116 	 * then tries to set the file size, e.g. ftruncate(3C),
   5117 	 * fcntl(F_FREESP), the normal access checking done in
   5118 	 * VOP_SETATTR would prevent the client from doing it even though
   5119 	 * it should be allowed to do so.  To get around this, we do the
   5120 	 * access checking for ourselves and use VOP_SPACE which doesn't
   5121 	 * do the access checking.
   5122 	 * Also the client should not be allowed to change the file
   5123 	 * size if there is a conflicting non-blocking mandatory lock in
   5124 	 * the region of the change.
   5125 	 */
   5126 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
   5127 		u_offset_t offset;
   5128 		ssize_t length;
   5129 
   5130 		/*
   5131 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
   5132 		 * before returning, sarg.vap->va_mask is used to
   5133 		 * generate the setattr reply bitmap.  We also clear
   5134 		 * AT_SIZE below before calling VOP_SPACE.  For both
   5135 		 * of these cases, the va_mask needs to be saved here
   5136 		 * and restored after calling VOP_SETATTR.
   5137 		 */
   5138 		saved_mask = sarg.vap->va_mask;
   5139 
   5140 		/*
   5141 		 * Check any possible conflict due to NBMAND locks.
   5142 		 * Get into critical region before VOP_GETATTR, so the
   5143 		 * size attribute is valid when checking conflicts.
   5144 		 */
   5145 		if (nbl_need_check(vp)) {
   5146 			nbl_start_crit(vp, RW_READER);
   5147 			in_crit = 1;
   5148 		}
   5149 
   5150 		bva.va_mask = AT_UID|AT_SIZE;
   5151 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
   5152 			status = puterrno4(error);
   5153 			goto done;
   5154 		}
   5155 
   5156 		if (in_crit) {
   5157 			if (sarg.vap->va_size < bva.va_size) {
   5158 				offset = sarg.vap->va_size;
   5159 				length = bva.va_size - sarg.vap->va_size;
   5160 			} else {
   5161 				offset = bva.va_size;
   5162 				length = sarg.vap->va_size - bva.va_size;
   5163 			}
   5164 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
   5165 			    &ct)) {
   5166 				status = NFS4ERR_LOCKED;
   5167 				goto done;
   5168 			}
   5169 		}
   5170 
   5171 		if (crgetuid(cr) == bva.va_uid) {
   5172 			sarg.vap->va_mask &= ~AT_SIZE;
   5173 			bf.l_type = F_WRLCK;
   5174 			bf.l_whence = 0;
   5175 			bf.l_start = (off64_t)sarg.vap->va_size;
   5176 			bf.l_len = 0;
   5177 			bf.l_sysid = 0;
   5178 			bf.l_pid = 0;
   5179 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
   5180 			    (offset_t)sarg.vap->va_size, cr, &ct);
   5181 		}
   5182 	}
   5183 
   5184 	if (!error && sarg.vap->va_mask != 0)
   5185 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
   5186 
   5187 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
   5188 	if (saved_mask & AT_SIZE)
   5189 		sarg.vap->va_mask |= AT_SIZE;
   5190 
   5191 	/*
   5192 	 * If an ACL was being set, it has been delayed until now,
   5193 	 * in order to set the mode (via the VOP_SETATTR() above) first.
   5194 	 */
   5195 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
   5196 		int i;
   5197 
   5198 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
   5199 			if (ntov.amap[i] == FATTR4_ACL)
   5200 				break;
   5201 		if (i < NFS4_MAXNUM_ATTRS) {
   5202 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
   5203 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
   5204 			if (error == 0) {
   5205 				*resp |= FATTR4_ACL_MASK;
   5206 			} else if (error == ENOTSUP) {
   5207 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
   5208 				status = NFS4ERR_ATTRNOTSUPP;
   5209 				goto done;
   5210 			}
   5211 		} else {
   5212 			NFS4_DEBUG(rfs4_debug,
   5213 			    (CE_NOTE, "do_rfs4_op_setattr: "
   5214 			    "unable to find ACL in fattr4"));
   5215 			error = EINVAL;
   5216 		}
   5217 	}
   5218 
   5219 	if (error) {
   5220 		/* check if a monitor detected a delegation conflict */
   5221 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
   5222 			status = NFS4ERR_DELAY;
   5223 		else
   5224 			status = puterrno4(error);
   5225 
   5226 		/*
   5227 		 * Set the response bitmap when setattr failed.
   5228 		 * If VOP_SETATTR partially succeeded, test by doing a
   5229 		 * VOP_GETATTR on the object and comparing the data
   5230 		 * to the setattr arguments.
   5231 		 */
   5232 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
   5233 	} else {
   5234 		/*
   5235 		 * Force modified metadata out to stable storage.
   5236 		 */
   5237 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
   5238 		/*
   5239 		 * Set response bitmap
   5240 		 */
   5241 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
   5242 	}
   5243 
   5244 /* Return early and already have a NFSv4 error */
   5245 done:
   5246 	/*
   5247 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
   5248 	 * conversion sets both readable and writeable NFS4 attrs
   5249 	 * for AT_MTIME and AT_ATIME.  The line below masks out
   5250 	 * unrequested attrs from the setattr result bitmap.  This
   5251 	 * is placed after the done: label to catch the ATTRNOTSUP
   5252 	 * case.
   5253 	 */
   5254 	*resp &= fattrp->attrmask;
   5255 
   5256 	if (in_crit)
   5257 		nbl_end_crit(vp);
   5258 
   5259 	nfs4_ntov_table_free(&ntov, &sarg);
   5260 
   5261 	return (status);
   5262 }
   5263 
   5264 /* ARGSUSED */
   5265 static void
   5266 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5267     struct compound_state *cs)
   5268 {
   5269 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
   5270 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
   5271 	bslabel_t *clabel;
   5272 
   5273 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
   5274 	    SETATTR4args *, args);
   5275 
   5276 	if (cs->vp == NULL) {
   5277 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5278 		goto out;
   5279 	}
   5280 
   5281 	/*
   5282 	 * If there is an unshared filesystem mounted on this vnode,
   5283 	 * do not allow to setattr on this vnode.
   5284 	 */
   5285 	if (vn_ismntpt(cs->vp)) {
   5286 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5287 		goto out;
   5288 	}
   5289 
   5290 	resp->attrsset = 0;
   5291 
   5292 	if (rdonly4(cs->exi, cs->vp, req)) {
   5293 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   5294 		goto out;
   5295 	}
   5296 
   5297 	/* check label before setting attributes */
   5298 	if (is_system_labeled()) {
   5299 		ASSERT(req->rq_label != NULL);
   5300 		clabel = req->rq_label;
   5301 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
   5302 		    "got client label from request(1)",
   5303 		    struct svc_req *, req);
   5304 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   5305 			if (!do_rfs_label_check(clabel, cs->vp,
   5306 			    EQUALITY_CHECK, cs->exi)) {
   5307 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5308 				goto out;
   5309 			}
   5310 		}
   5311 	}
   5312 
   5313 	*cs->statusp = resp->status =
   5314 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
   5315 	    &args->stateid);
   5316 
   5317 out:
   5318 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
   5319 	    SETATTR4res *, resp);
   5320 }
   5321 
   5322 /* ARGSUSED */
   5323 static void
   5324 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5325     struct compound_state *cs)
   5326 {
   5327 	/*
   5328 	 * verify and nverify are exactly the same, except that nverify
   5329 	 * succeeds when some argument changed, and verify succeeds when
   5330 	 * when none changed.
   5331 	 */
   5332 
   5333 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
   5334 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
   5335 
   5336 	int error;
   5337 	struct nfs4_svgetit_arg sarg;
   5338 	struct statvfs64 sb;
   5339 	struct nfs4_ntov_table ntov;
   5340 
   5341 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
   5342 	    VERIFY4args *, args);
   5343 
   5344 	if (cs->vp == NULL) {
   5345 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5346 		goto out;
   5347 	}
   5348 
   5349 	sarg.sbp = &sb;
   5350 	nfs4_ntov_table_init(&ntov);
   5351 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   5352 	    &sarg, &ntov, NFS4ATTR_VERIT);
   5353 	if (resp->status != NFS4_OK) {
   5354 		/*
   5355 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   5356 		 * so could return -1 for "no match".
   5357 		 */
   5358 		if (resp->status == -1)
   5359 			resp->status = NFS4ERR_NOT_SAME;
   5360 		goto done;
   5361 	}
   5362 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   5363 	switch (error) {
   5364 	case 0:
   5365 		resp->status = NFS4_OK;
   5366 		break;
   5367 	case -1:
   5368 		resp->status = NFS4ERR_NOT_SAME;
   5369 		break;
   5370 	default:
   5371 		resp->status = puterrno4(error);
   5372 		break;
   5373 	}
   5374 done:
   5375 	*cs->statusp = resp->status;
   5376 	nfs4_ntov_table_free(&ntov, &sarg);
   5377 out:
   5378 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
   5379 	    VERIFY4res *, resp);
   5380 }
   5381 
   5382 /* ARGSUSED */
   5383 static void
   5384 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5385     struct compound_state *cs)
   5386 {
   5387 	/*
   5388 	 * verify and nverify are exactly the same, except that nverify
   5389 	 * succeeds when some argument changed, and verify succeeds when
   5390 	 * when none changed.
   5391 	 */
   5392 
   5393 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
   5394 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
   5395 
   5396 	int error;
   5397 	struct nfs4_svgetit_arg sarg;
   5398 	struct statvfs64 sb;
   5399 	struct nfs4_ntov_table ntov;
   5400 
   5401 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
   5402 	    NVERIFY4args *, args);
   5403 
   5404 	if (cs->vp == NULL) {
   5405 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5406 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   5407 		    NVERIFY4res *, resp);
   5408 		return;
   5409 	}
   5410 	sarg.sbp = &sb;
   5411 	nfs4_ntov_table_init(&ntov);
   5412 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   5413 	    &sarg, &ntov, NFS4ATTR_VERIT);
   5414 	if (resp->status != NFS4_OK) {
   5415 		/*
   5416 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   5417 		 * so could return -1 for "no match".
   5418 		 */
   5419 		if (resp->status == -1)
   5420 			resp->status = NFS4_OK;
   5421 		goto done;
   5422 	}
   5423 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   5424 	switch (error) {
   5425 	case 0:
   5426 		resp->status = NFS4ERR_SAME;
   5427 		break;
   5428 	case -1:
   5429 		resp->status = NFS4_OK;
   5430 		break;
   5431 	default:
   5432 		resp->status = puterrno4(error);
   5433 		break;
   5434 	}
   5435 done:
   5436 	*cs->statusp = resp->status;
   5437 	nfs4_ntov_table_free(&ntov, &sarg);
   5438 
   5439 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   5440 	    NVERIFY4res *, resp);
   5441 }
   5442 
   5443 /*
   5444  * XXX - This should live in an NFS header file.
   5445  */
   5446 #define	MAX_IOVECS	12
   5447 
   5448 /* ARGSUSED */
   5449 static void
   5450 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5451     struct compound_state *cs)
   5452 {
   5453 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
   5454 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
   5455 	int error;
   5456 	vnode_t *vp;
   5457 	struct vattr bva;
   5458 	u_offset_t rlimit;
   5459 	struct uio uio;
   5460 	struct iovec iov[MAX_IOVECS];
   5461 	struct iovec *iovp;
   5462 	int iovcnt;
   5463 	int ioflag;
   5464 	cred_t *savecred, *cr;
   5465 	bool_t *deleg = &cs->deleg;
   5466 	nfsstat4 stat;
   5467 	int in_crit = 0;
   5468 	caller_context_t ct;
   5469 
   5470 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
   5471 	    WRITE4args *, args);
   5472 
   5473 	vp = cs->vp;
   5474 	if (vp == NULL) {
   5475 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5476 		goto out;
   5477 	}
   5478 	if (cs->access == CS_ACCESS_DENIED) {
   5479 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5480 		goto out;
   5481 	}
   5482 
   5483 	cr = cs->cr;
   5484 
   5485 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
   5486 	    deleg, TRUE, &ct)) != NFS4_OK) {
   5487 		*cs->statusp = resp->status = stat;
   5488 		goto out;
   5489 	}
   5490 
   5491 	/*
   5492 	 * We have to enter the critical region before calling VOP_RWLOCK
   5493 	 * to avoid a deadlock with ufs.
   5494 	 */
   5495 	if (nbl_need_check(vp)) {
   5496 		nbl_start_crit(vp, RW_READER);
   5497 		in_crit = 1;
   5498 		if (nbl_conflict(vp, NBL_WRITE,
   5499 		    args->offset, args->data_len, 0, &ct)) {
   5500 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
   5501 			goto out;
   5502 		}
   5503 	}
   5504 
   5505 	bva.va_mask = AT_MODE | AT_UID;
   5506 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
   5507 
   5508 	/*
   5509 	 * If we can't get the attributes, then we can't do the
   5510 	 * right access checking.  So, we'll fail the request.
   5511 	 */
   5512 	if (error) {
   5513 		*cs->statusp = resp->status = puterrno4(error);
   5514 		goto out;
   5515 	}
   5516 
   5517 	if (rdonly4(cs->exi, cs->vp, req)) {
   5518 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   5519 		goto out;
   5520 	}
   5521 
   5522 	if (vp->v_type != VREG) {
   5523 		*cs->statusp = resp->status =
   5524 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
   5525 		goto out;
   5526 	}
   5527 
   5528 	if (crgetuid(cr) != bva.va_uid &&
   5529 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
   5530 		*cs->statusp = resp->status = puterrno4(error);
   5531 		goto out;
   5532 	}
   5533 
   5534 	if (MANDLOCK(vp, bva.va_mode)) {
   5535 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5536 		goto out;
   5537 	}
   5538 
   5539 	if (args->data_len == 0) {
   5540 		*cs->statusp = resp->status = NFS4_OK;
   5541 		resp->count = 0;
   5542 		resp->committed = args->stable;
   5543 		resp->writeverf = Write4verf;
   5544 		goto out;
   5545 	}
   5546 
   5547 	if (args->mblk != NULL) {
   5548 		mblk_t *m;
   5549 		uint_t bytes, round_len;
   5550 
   5551 		iovcnt = 0;
   5552 		bytes = 0;
   5553 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
   5554 		for (m = args->mblk;
   5555 		    m != NULL && bytes < round_len;
   5556 		    m = m->b_cont) {
   5557 			iovcnt++;
   5558 			bytes += MBLKL(m);
   5559 		}
   5560 #ifdef DEBUG
   5561 		/* should have ended on an mblk boundary */
   5562 		if (bytes != round_len) {
   5563 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
   5564 			    bytes, round_len, args->data_len);
   5565 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
   5566 			    (void *)args->mblk, (void *)m);
   5567 			ASSERT(bytes == round_len);
   5568 		}
   5569 #endif
   5570 		if (iovcnt <= MAX_IOVECS) {
   5571 			iovp = iov;
   5572 		} else {
   5573 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
   5574 		}
   5575 		mblk_to_iov(args->mblk, iovcnt, iovp);
   5576 	} else if (args->rlist != NULL) {
   5577 		iovcnt = 1;
   5578 		iovp = iov;
   5579 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
   5580 		iovp->iov_len = args->data_len;
   5581 	} else {
   5582 		iovcnt = 1;
   5583 		iovp = iov;
   5584 		iovp->iov_base = args->data_val;
   5585 		iovp->iov_len = args->data_len;
   5586 	}
   5587 
   5588 	uio.uio_iov = iovp;
   5589 	uio.uio_iovcnt = iovcnt;
   5590 
   5591 	uio.uio_segflg = UIO_SYSSPACE;
   5592 	uio.uio_extflg = UIO_COPY_DEFAULT;
   5593 	uio.uio_loffset = args->offset;
   5594 	uio.uio_resid = args->data_len;
   5595 	uio.uio_llimit = curproc->p_fsz_ctl;
   5596 	rlimit = uio.uio_llimit - args->offset;
   5597 	if (rlimit < (u_offset_t)uio.uio_resid)
   5598 		uio.uio_resid = (int)rlimit;
   5599 
   5600 	if (args->stable == UNSTABLE4)
   5601 		ioflag = 0;
   5602 	else if (args->stable == FILE_SYNC4)
   5603 		ioflag = FSYNC;
   5604 	else if (args->stable == DATA_SYNC4)
   5605 		ioflag = FDSYNC;
   5606 	else {
   5607 		if (iovp != iov)
   5608 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
   5609 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5610 		goto out;
   5611 	}
   5612 
   5613 	/*
   5614 	 * We're changing creds because VM may fault and we need
   5615 	 * the cred of the current thread to be used if quota
   5616 	 * checking is enabled.
   5617 	 */
   5618 	savecred = curthread->t_cred;
   5619 	curthread->t_cred = cr;
   5620 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
   5621 	curthread->t_cred = savecred;
   5622 
   5623 	if (iovp != iov)
   5624 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
   5625 
   5626 	if (error) {
   5627 		*cs->statusp = resp->status = puterrno4(error);
   5628 		goto out;
   5629 	}
   5630 
   5631 	*cs->statusp = resp->status = NFS4_OK;
   5632 	resp->count = args->data_len - uio.uio_resid;
   5633 
   5634 	if (ioflag == 0)
   5635 		resp->committed = UNSTABLE4;
   5636 	else
   5637 		resp->committed = FILE_SYNC4;
   5638 
   5639 	resp->writeverf = Write4verf;
   5640 
   5641 out:
   5642 	if (in_crit)
   5643 		nbl_end_crit(vp);
   5644 
   5645 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
   5646 	    WRITE4res *, resp);
   5647 }
   5648 
   5649 
   5650 /* XXX put in a header file */
   5651 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
   5652 
   5653 void
   5654 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
   5655     struct svc_req *req, cred_t *cr, int *rv)
   5656 {
   5657 	uint_t i;
   5658 	struct compound_state cs;
   5659 
   5660 	if (rv != NULL)
   5661 		*rv = 0;
   5662 	rfs4_init_compound_state(&cs);
   5663 	/*
   5664 	 * Form a reply tag by copying over the reqeuest tag.
   5665 	 */
   5666 	resp->tag.utf8string_val =
   5667 	    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
   5668 	resp->tag.utf8string_len = args->tag.utf8string_len;
   5669 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
   5670 	    resp->tag.utf8string_len);
   5671 
   5672 	cs.statusp = &resp->status;
   5673 	cs.req = req;
   5674 
   5675 	/*
   5676 	 * XXX for now, minorversion should be zero
   5677 	 */
   5678 	if (args->minorversion != NFS4_MINORVERSION) {
   5679 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
   5680 		    &cs, COMPOUND4args *, args);
   5681 		resp->array_len = 0;
   5682 		resp->array = NULL;
   5683 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
   5684 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
   5685 		    &cs, COMPOUND4res *, resp);
   5686 		return;
   5687 	}
   5688 
   5689 	ASSERT(exi == NULL);
   5690 	ASSERT(cr == NULL);
   5691 
   5692 	cr = crget();
   5693 	ASSERT(cr != NULL);
   5694 
   5695 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
   5696 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
   5697 		    &cs, COMPOUND4args *, args);
   5698 		crfree(cr);
   5699 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
   5700 		    &cs, COMPOUND4res *, resp);
   5701 		svcerr_badcred(req->rq_xprt);
   5702 		if (rv != NULL)
   5703 			*rv = 1;
   5704 		return;
   5705 	}
   5706 	resp->array_len = args->array_len;
   5707 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
   5708 	    KM_SLEEP);
   5709 
   5710 	cs.basecr = cr;
   5711 
   5712 	DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
   5713 	    COMPOUND4args *, args);
   5714 
   5715 	/*
   5716 	 * For now, NFS4 compound processing must be protected by
   5717 	 * exported_lock because it can access more than one exportinfo
   5718 	 * per compound and share/unshare can now change multiple
   5719 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
   5720 	 * per proc (excluding public exinfo), and exi_count design
   5721 	 * is sufficient to protect concurrent execution of NFS2/3
   5722 	 * ops along with unexport.  This lock will be removed as
   5723 	 * part of the NFSv4 phase 2 namespace redesign work.
   5724 	 */
   5725 	rw_enter(&exported_lock, RW_READER);
   5726 
   5727 	/*
   5728 	 * If this is the first compound we've seen, we need to start all
   5729 	 * new instances' grace periods.
   5730 	 */
   5731 	if (rfs4_seen_first_compound == 0) {
   5732 		rfs4_grace_start_new();
   5733 		/*
   5734 		 * This must be set after rfs4_grace_start_new(), otherwise
   5735 		 * another thread could proceed past here before the former
   5736 		 * is finished.
   5737 		 */
   5738 		rfs4_seen_first_compound = 1;
   5739 	}
   5740 
   5741 	for (i = 0; i < args->array_len && cs.cont; i++) {
   5742 		nfs_argop4 *argop;
   5743 		nfs_resop4 *resop;
   5744 		uint_t op;
   5745 
   5746 		argop = &args->array[i];
   5747 		resop = &resp->array[i];
   5748 		resop->resop = argop->argop;
   5749 		op = (uint_t)resop->resop;
   5750 
   5751 		if (op < rfsv4disp_cnt) {
   5752 			/*
   5753 			 * Count the individual ops here; NULL and COMPOUND
   5754 			 * are counted in common_dispatch()
   5755 			 */
   5756 			rfsproccnt_v4_ptr[op].value.ui64++;
   5757 
   5758 			NFS4_DEBUG(rfs4_debug > 1,
   5759 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
   5760 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
   5761 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
   5762 			    rfs4_op_string[op], *cs.statusp));
   5763 			if (*cs.statusp != NFS4_OK)
   5764 				cs.cont = FALSE;
   5765 		} else {
   5766 			/*
   5767 			 * This is effectively dead code since XDR code
   5768 			 * will have already returned BADXDR if op doesn't
   5769 			 * decode to legal value.  This only done for a
   5770 			 * day when XDR code doesn't verify v4 opcodes.
   5771 			 */
   5772 			op = OP_ILLEGAL;
   5773 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
   5774 
   5775 			rfs4_op_illegal(argop, resop, req, &cs);
   5776 			cs.cont = FALSE;
   5777 		}
   5778 
   5779 		/*
   5780 		 * If not at last op, and if we are to stop, then
   5781 		 * compact the results array.
   5782 		 */
   5783 		if ((i + 1) < args->array_len && !cs.cont) {
   5784 			nfs_resop4 *new_res = kmem_alloc(
   5785 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
   5786 			bcopy(resp->array,
   5787 			    new_res, (i+1) * sizeof (nfs_resop4));
   5788 			kmem_free(resp->array,
   5789 			    args->array_len * sizeof (nfs_resop4));
   5790 
   5791 			resp->array_len =  i + 1;
   5792 			resp->array = new_res;
   5793 		}
   5794 	}
   5795 
   5796 	rw_exit(&exported_lock);
   5797 
   5798 	DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
   5799 	    COMPOUND4res *, resp);
   5800 
   5801 	if (cs.vp)
   5802 		VN_RELE(cs.vp);
   5803 	if (cs.saved_vp)
   5804 		VN_RELE(cs.saved_vp);
   5805 	if (cs.saved_fh.nfs_fh4_val)
   5806 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
   5807 
   5808 	if (cs.basecr)
   5809 		crfree(cs.basecr);
   5810 	if (cs.cr)
   5811 		crfree(cs.cr);
   5812 	/*
   5813 	 * done with this compound request, free the label
   5814 	 */
   5815 
   5816 	if (req->rq_label != NULL) {
   5817 		kmem_free(req->rq_label, sizeof (bslabel_t));
   5818 		req->rq_label = NULL;
   5819 	}
   5820 }
   5821 
   5822 /*
   5823  * XXX because of what appears to be duplicate calls to rfs4_compound_free
   5824  * XXX zero out the tag and array values. Need to investigate why the
   5825  * XXX calls occur, but at least prevent the panic for now.
   5826  */
   5827 void
   5828 rfs4_compound_free(COMPOUND4res *resp)
   5829 {
   5830 	uint_t i;
   5831 
   5832 	if (resp->tag.utf8string_val) {
   5833 		UTF8STRING_FREE(resp->tag)
   5834 	}
   5835 
   5836 	for (i = 0; i < resp->array_len; i++) {
   5837 		nfs_resop4 *resop;
   5838 		uint_t op;
   5839 
   5840 		resop = &resp->array[i];
   5841 		op = (uint_t)resop->resop;
   5842 		if (op < rfsv4disp_cnt) {
   5843 			(*rfsv4disptab[op].dis_resfree)(resop);
   5844 		}
   5845 	}
   5846 	if (resp->array != NULL) {
   5847 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
   5848 	}
   5849 }
   5850 
   5851 /*
   5852  * Process the value of the compound request rpc flags, as a bit-AND
   5853  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
   5854  */
   5855 void
   5856 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
   5857 {
   5858 	int i;
   5859 	int flag = RPC_ALL;
   5860 
   5861 	for (i = 0; flag && i < args->array_len; i++) {
   5862 		uint_t op;
   5863 
   5864 		op = (uint_t)args->array[i].argop;
   5865 
   5866 		if (op < rfsv4disp_cnt)
   5867 			flag &= rfsv4disptab[op].dis_flags;
   5868 		else
   5869 			flag = 0;
   5870 	}
   5871 	*flagp = flag;
   5872 }
   5873 
   5874 nfsstat4
   5875 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
   5876 {
   5877 	nfsstat4 e;
   5878 
   5879 	rfs4_dbe_lock(cp->rc_dbe);
   5880 
   5881 	if (cp->rc_sysidt != LM_NOSYSID) {
   5882 		*sp = cp->rc_sysidt;
   5883 		e = NFS4_OK;
   5884 
   5885 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
   5886 		*sp = cp->rc_sysidt;
   5887 		e = NFS4_OK;
   5888 
   5889 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
   5890 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
   5891 	} else
   5892 		e = NFS4ERR_DELAY;
   5893 
   5894 	rfs4_dbe_unlock(cp->rc_dbe);
   5895 	return (e);
   5896 }
   5897 
   5898 #if defined(DEBUG) && ! defined(lint)
   5899 static void lock_print(char *str, int operation, struct flock64 *flk)
   5900 {
   5901 	char *op, *type;
   5902 
   5903 	switch (operation) {
   5904 	case F_GETLK: op = "F_GETLK";
   5905 		break;
   5906 	case F_SETLK: op = "F_SETLK";
   5907 		break;
   5908 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
   5909 		break;
   5910 	default: op = "F_UNKNOWN";
   5911 		break;
   5912 	}
   5913 	switch (flk->l_type) {
   5914 	case F_UNLCK: type = "F_UNLCK";
   5915 		break;
   5916 	case F_RDLCK: type = "F_RDLCK";
   5917 		break;
   5918 	case F_WRLCK: type = "F_WRLCK";
   5919 		break;
   5920 	default: type = "F_UNKNOWN";
   5921 		break;
   5922 	}
   5923 
   5924 	ASSERT(flk->l_whence == 0);
   5925 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
   5926 	    str, op, type, (longlong_t)flk->l_start,
   5927 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
   5928 }
   5929 
   5930 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
   5931 #else
   5932 #define	LOCK_PRINT(d, s, t, f)
   5933 #endif
   5934 
   5935 /*ARGSUSED*/
   5936 static bool_t
   5937 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
   5938 {
   5939 	return (TRUE);
   5940 }
   5941 
   5942 /*
   5943  * Look up the pathname using the vp in cs as the directory vnode.
   5944  * cs->vp will be the vnode for the file on success
   5945  */
   5946 
   5947 static nfsstat4
   5948 rfs4_lookup(component4 *component, struct svc_req *req,
   5949     struct compound_state *cs)
   5950 {
   5951 	char *nm;
   5952 	uint32_t len;
   5953 	nfsstat4 status;
   5954 
   5955 	if (cs->vp == NULL) {
   5956 		return (NFS4ERR_NOFILEHANDLE);
   5957 	}
   5958 	if (cs->vp->v_type != VDIR) {
   5959 		return (NFS4ERR_NOTDIR);
   5960 	}
   5961 
   5962 	if (!utf8_dir_verify(component))
   5963 		return (NFS4ERR_INVAL);
   5964 
   5965 	nm = utf8_to_fn(component, &len, NULL);
   5966 	if (nm == NULL) {
   5967 		return (NFS4ERR_INVAL);
   5968 	}
   5969 
   5970 	if (len > MAXNAMELEN) {
   5971 		kmem_free(nm, len);
   5972 		return (NFS4ERR_NAMETOOLONG);
   5973 	}
   5974 
   5975 	status = do_rfs4_op_lookup(nm, len, req, cs);
   5976 
   5977 	kmem_free(nm, len);
   5978 
   5979 	return (status);
   5980 }
   5981 
   5982 static nfsstat4
   5983 rfs4_lookupfile(component4 *component, struct svc_req *req,
   5984     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
   5985 {
   5986 	nfsstat4 status;
   5987 	vnode_t *dvp = cs->vp;
   5988 	vattr_t bva, ava, fva;
   5989 	int error;
   5990 
   5991 	/* Get "before" change value */
   5992 	bva.va_mask = AT_CTIME|AT_SEQ;
   5993 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   5994 	if (error)
   5995 		return (puterrno4(error));
   5996 
   5997 	/* rfs4_lookup may VN_RELE directory */
   5998 	VN_HOLD(dvp);
   5999 
   6000 	status = rfs4_lookup(component, req, cs);
   6001 	if (status != NFS4_OK) {
   6002 		VN_RELE(dvp);
   6003 		return (status);
   6004 	}
   6005 
   6006 	/*
   6007 	 * Get "after" change value, if it fails, simply return the
   6008 	 * before value.
   6009 	 */
   6010 	ava.va_mask = AT_CTIME|AT_SEQ;
   6011 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   6012 		ava.va_ctime = bva.va_ctime;
   6013 		ava.va_seq = 0;
   6014 	}
   6015 	VN_RELE(dvp);
   6016 
   6017 	/*
   6018 	 * Validate the file is a file
   6019 	 */
   6020 	fva.va_mask = AT_TYPE|AT_MODE;
   6021 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
   6022 	if (error)
   6023 		return (puterrno4(error));
   6024 
   6025 	if (fva.va_type != VREG) {
   6026 		if (fva.va_type == VDIR)
   6027 			return (NFS4ERR_ISDIR);
   6028 		if (fva.va_type == VLNK)
   6029 			return (NFS4ERR_SYMLINK);
   6030 		return (NFS4ERR_INVAL);
   6031 	}
   6032 
   6033 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
   6034 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   6035 
   6036 	/*
   6037 	 * It is undefined if VOP_LOOKUP will change va_seq, so
   6038 	 * cinfo.atomic = TRUE only if we have
   6039 	 * non-zero va_seq's, and they have not changed.
   6040 	 */
   6041 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
   6042 		cinfo->atomic = TRUE;
   6043 	else
   6044 		cinfo->atomic = FALSE;
   6045 
   6046 	/* Check for mandatory locking */
   6047 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
   6048 	return (check_open_access(access, cs, req));
   6049 }
   6050 
   6051 static nfsstat4
   6052 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
   6053     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
   6054 {
   6055 	int error;
   6056 	nfsstat4 status = NFS4_OK;
   6057 	vattr_t va;
   6058 
   6059 tryagain:
   6060 
   6061 	/*
   6062 	 * The file open mode used is VWRITE.  If the client needs
   6063 	 * some other semantic, then it should do the access checking
   6064 	 * itself.  It would have been nice to have the file open mode
   6065 	 * passed as part of the arguments.
   6066 	 */
   6067 
   6068 	*created = TRUE;
   6069 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
   6070 
   6071 	if (error) {
   6072 		*created = FALSE;
   6073 
   6074 		/*
   6075 		 * If we got something other than file already exists
   6076 		 * then just return this error.  Otherwise, we got
   6077 		 * EEXIST.  If we were doing a GUARDED create, then
   6078 		 * just return this error.  Otherwise, we need to
   6079 		 * make sure that this wasn't a duplicate of an
   6080 		 * exclusive create request.
   6081 		 *
   6082 		 * The assumption is made that a non-exclusive create
   6083 		 * request will never return EEXIST.
   6084 		 */
   6085 
   6086 		if (error != EEXIST || mode == GUARDED4) {
   6087 			status = puterrno4(error);
   6088 			return (status);
   6089 		}
   6090 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
   6091 		    NULL, NULL, NULL);
   6092 
   6093 		if (error) {
   6094 			/*
   6095 			 * We couldn't find the file that we thought that
   6096 			 * we just created.  So, we'll just try creating
   6097 			 * it again.
   6098 			 */
   6099 			if (error == ENOENT)
   6100 				goto tryagain;
   6101 
   6102 			status = puterrno4(error);
   6103 			return (status);
   6104 		}
   6105 
   6106 		if (mode == UNCHECKED4) {
   6107 			/* existing object must be regular file */
   6108 			if ((*vpp)->v_type != VREG) {
   6109 				if ((*vpp)->v_type == VDIR)
   6110 					status = NFS4ERR_ISDIR;
   6111 				else if ((*vpp)->v_type == VLNK)
   6112 					status = NFS4ERR_SYMLINK;
   6113 				else
   6114 					status = NFS4ERR_INVAL;
   6115 				VN_RELE(*vpp);
   6116 				return (status);
   6117 			}
   6118 
   6119 			return (NFS4_OK);
   6120 		}
   6121 
   6122 		/* Check for duplicate request */
   6123 		ASSERT(mtime != 0);
   6124 		va.va_mask = AT_MTIME;
   6125 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
   6126 		if (!error) {
   6127 			/* We found the file */
   6128 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
   6129 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
   6130 				/* but its not our creation */
   6131 				VN_RELE(*vpp);
   6132 				return (NFS4ERR_EXIST);
   6133 			}
   6134 			*created = TRUE; /* retrans of create == created */
   6135 			return (NFS4_OK);
   6136 		}
   6137 		VN_RELE(*vpp);
   6138 		return (NFS4ERR_EXIST);
   6139 	}
   6140 
   6141 	return (NFS4_OK);
   6142 }
   6143 
   6144 static nfsstat4
   6145 check_open_access(uint32_t access, struct compound_state *cs,
   6146     struct svc_req *req)
   6147 {
   6148 	int error;
   6149 	vnode_t *vp;
   6150 	bool_t readonly;
   6151 	cred_t *cr = cs->cr;
   6152 
   6153 	/* For now we don't allow mandatory locking as per V2/V3 */
   6154 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
   6155 		return (NFS4ERR_ACCESS);
   6156 	}
   6157 
   6158 	vp = cs->vp;
   6159 	ASSERT(cr != NULL && vp->v_type == VREG);
   6160 
   6161 	/*
   6162 	 * If the file system is exported read only and we are trying
   6163 	 * to open for write, then return NFS4ERR_ROFS
   6164 	 */
   6165 
   6166 	readonly = rdonly4(cs->exi, cs->vp, req);
   6167 
   6168 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
   6169 		return (NFS4ERR_ROFS);
   6170 
   6171 	if (access & OPEN4_SHARE_ACCESS_READ) {
   6172 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
   6173 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
   6174 			return (NFS4ERR_ACCESS);
   6175 		}
   6176 	}
   6177 
   6178 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
   6179 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   6180 		if (error)
   6181 			return (NFS4ERR_ACCESS);
   6182 	}
   6183 
   6184 	return (NFS4_OK);
   6185 }
   6186 
   6187 static nfsstat4
   6188 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
   6189     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
   6190 {
   6191 	struct nfs4_svgetit_arg sarg;
   6192 	struct nfs4_ntov_table ntov;
   6193 
   6194 	bool_t ntov_table_init = FALSE;
   6195 	struct statvfs64 sb;
   6196 	nfsstat4 status;
   6197 	vnode_t *vp;
   6198 	vattr_t bva, ava, iva, cva, *vap;
   6199 	vnode_t *dvp;
   6200 	timespec32_t *mtime;
   6201 	char *nm = NULL;
   6202 	uint_t buflen;
   6203 	bool_t created;
   6204 	bool_t setsize = FALSE;
   6205 	len_t reqsize;
   6206 	int error;
   6207 	bool_t trunc;
   6208 	caller_context_t ct;
   6209 	component4 *component;
   6210 	bslabel_t *clabel;
   6211 	struct sockaddr *ca;
   6212 	char *name = NULL;
   6213 
   6214 	sarg.sbp = &sb;
   6215 
   6216 	dvp = cs->vp;
   6217 
   6218 	/* Check if the file system is read only */
   6219 	if (rdonly4(cs->exi, dvp, req))
   6220 		return (NFS4ERR_ROFS);
   6221 
   6222 	/* check the label of including directory */
   6223 	if (is_system_labeled()) {
   6224 		ASSERT(req->rq_label != NULL);
   6225 		clabel = req->rq_label;
   6226 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   6227 		    "got client label from request(1)",
   6228 		    struct svc_req *, req);
   6229 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   6230 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
   6231 			    cs->exi)) {
   6232 				return (NFS4ERR_ACCESS);
   6233 			}
   6234 		}
   6235 	}
   6236 
   6237 	/*
   6238 	 * Get the last component of path name in nm. cs will reference
   6239 	 * the including directory on success.
   6240 	 */
   6241 	component = &args->open_claim4_u.file;
   6242 	if (!utf8_dir_verify(component))
   6243 		return (NFS4ERR_INVAL);
   6244 
   6245 	nm = utf8_to_fn(component, &buflen, NULL);
   6246 
   6247 	if (nm == NULL)
   6248 		return (NFS4ERR_RESOURCE);
   6249 
   6250 	if (buflen > MAXNAMELEN) {
   6251 		kmem_free(nm, buflen);
   6252 		return (NFS4ERR_NAMETOOLONG);
   6253 	}
   6254 
   6255 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
   6256 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   6257 	if (error) {
   6258 		kmem_free(nm, buflen);
   6259 		return (puterrno4(error));
   6260 	}
   6261 
   6262 	if (bva.va_type != VDIR) {
   6263 		kmem_free(nm, buflen);
   6264 		return (NFS4ERR_NOTDIR);
   6265 	}
   6266 
   6267 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
   6268 
   6269 	switch (args->mode) {
   6270 	case GUARDED4:
   6271 		/*FALLTHROUGH*/
   6272 	case UNCHECKED4:
   6273 		nfs4_ntov_table_init(&ntov);
   6274 		ntov_table_init = TRUE;
   6275 
   6276 		*attrset = 0;
   6277 		status = do_rfs4_set_attrs(attrset,
   6278 		    &args->createhow4_u.createattrs,
   6279 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
   6280 
   6281 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
   6282 		    sarg.vap->va_type != VREG) {
   6283 			if (sarg.vap->va_type == VDIR)
   6284 				status = NFS4ERR_ISDIR;
   6285 			else if (sarg.vap->va_type == VLNK)
   6286 				status = NFS4ERR_SYMLINK;
   6287 			else
   6288 				status = NFS4ERR_INVAL;
   6289 		}
   6290 
   6291 		if (status != NFS4_OK) {
   6292 			kmem_free(nm, buflen);
   6293 			nfs4_ntov_table_free(&ntov, &sarg);
   6294 			*attrset = 0;
   6295 			return (status);
   6296 		}
   6297 
   6298 		vap = sarg.vap;
   6299 		vap->va_type = VREG;
   6300 		vap->va_mask |= AT_TYPE;
   6301 
   6302 		if ((vap->va_mask & AT_MODE) == 0) {
   6303 			vap->va_mask |= AT_MODE;
   6304 			vap->va_mode = (mode_t)0600;
   6305 		}
   6306 
   6307 		if (vap->va_mask & AT_SIZE) {
   6308 
   6309 			/* Disallow create with a non-zero size */
   6310 
   6311 			if ((reqsize = sarg.vap->va_size) != 0) {
   6312 				kmem_free(nm, buflen);
   6313 				nfs4_ntov_table_free(&ntov, &sarg);
   6314 				*attrset = 0;
   6315 				return (NFS4ERR_INVAL);
   6316 			}
   6317 			setsize = TRUE;
   6318 		}
   6319 		break;
   6320 
   6321 	case EXCLUSIVE4:
   6322 		/* prohibit EXCL create of named attributes */
   6323 		if (dvp->v_flag & V_XATTRDIR) {
   6324 			kmem_free(nm, buflen);
   6325 			*attrset = 0;
   6326 			return (NFS4ERR_INVAL);
   6327 		}
   6328 
   6329 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
   6330 		cva.va_type = VREG;
   6331 		/*
   6332 		 * Ensure no time overflows. Assumes underlying
   6333 		 * filesystem supports at least 32 bits.
   6334 		 * Truncate nsec to usec resolution to allow valid
   6335 		 * compares even if the underlying filesystem truncates.
   6336 		 */
   6337 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
   6338 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
   6339 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
   6340 		cva.va_mode = (mode_t)0;
   6341 		vap = &cva;
   6342 
   6343 		/*
   6344 		 * For EXCL create, attrset is set to the server attr
   6345 		 * used to cache the client's verifier.
   6346 		 */
   6347 		*attrset = FATTR4_TIME_MODIFY_MASK;
   6348 		break;
   6349 	}
   6350 
   6351 	/* If necessary, convert to UTF-8 for illbehaved clients */
   6352 
   6353 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   6354 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   6355 	    MAXPATHLEN  + 1);
   6356 
   6357 	if (name == NULL) {
   6358 		kmem_free(nm, buflen);
   6359 		return (NFS4ERR_SERVERFAULT);
   6360 	}
   6361 
   6362 	status = create_vnode(dvp, name, vap, args->mode, mtime,
   6363 	    cs->cr, &vp, &created);
   6364 	if (nm != name)
   6365 		kmem_free(name, MAXPATHLEN + 1);
   6366 	kmem_free(nm, buflen);
   6367 
   6368 	if (status != NFS4_OK) {
   6369 		if (ntov_table_init)
   6370 			nfs4_ntov_table_free(&ntov, &sarg);
   6371 		*attrset = 0;
   6372 		return (status);
   6373 	}
   6374 
   6375 	trunc = (setsize && !created);
   6376 
   6377 	if (args->mode != EXCLUSIVE4) {
   6378 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
   6379 
   6380 		/*
   6381 		 * True verification that object was created with correct
   6382 		 * attrs is impossible.  The attrs could have been changed
   6383 		 * immediately after object creation.  If attributes did
   6384 		 * not verify, the only recourse for the server is to
   6385 		 * destroy the object.  Maybe if some attrs (like gid)
   6386 		 * are set incorrectly, the object should be destroyed;
   6387 		 * however, seems bad as a default policy.  Do we really
   6388 		 * want to destroy an object over one of the times not
   6389 		 * verifying correctly?  For these reasons, the server
   6390 		 * currently sets bits in attrset for createattrs
   6391 		 * that were set; however, no verification is done.
   6392 		 *
   6393 		 * vmask_to_nmask accounts for vattr bits set on create
   6394 		 *	[do_rfs4_set_attrs() only sets resp bits for
   6395 		 *	 non-vattr/vfs bits.]
   6396 		 * Mask off any bits we set by default so as not to return
   6397 		 * more attrset bits than were requested in createattrs
   6398 		 */
   6399 		if (created) {
   6400 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
   6401 			*attrset &= createmask;
   6402 		} else {
   6403 			/*
   6404 			 * We did not create the vnode (we tried but it
   6405 			 * already existed).  In this case, the only createattr
   6406 			 * that the spec allows the server to set is size,
   6407 			 * and even then, it can only be set if it is 0.
   6408 			 */
   6409 			*attrset = 0;
   6410 			if (trunc)
   6411 				*attrset = FATTR4_SIZE_MASK;
   6412 		}
   6413 	}
   6414 	if (ntov_table_init)
   6415 		nfs4_ntov_table_free(&ntov, &sarg);
   6416 
   6417 	/*
   6418 	 * Get the initial "after" sequence number, if it fails,
   6419 	 * set to zero, time to before.
   6420 	 */
   6421 	iva.va_mask = AT_CTIME|AT_SEQ;
   6422 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
   6423 		iva.va_seq = 0;
   6424 		iva.va_ctime = bva.va_ctime;
   6425 	}
   6426 
   6427 	/*
   6428 	 * create_vnode attempts to create the file exclusive,
   6429 	 * if it already exists the VOP_CREATE will fail and
   6430 	 * may not increase va_seq. It is atomic if
   6431 	 * we haven't changed the directory, but if it has changed
   6432 	 * we don't know what changed it.
   6433 	 */
   6434 	if (!created) {
   6435 		if (bva.va_seq && iva.va_seq &&
   6436 		    bva.va_seq == iva.va_seq)
   6437 			cinfo->atomic = TRUE;
   6438 		else
   6439 			cinfo->atomic = FALSE;
   6440 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
   6441 	} else {
   6442 		/*
   6443 		 * The entry was created, we need to sync the
   6444 		 * directory metadata.
   6445 		 */
   6446 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   6447 
   6448 		/*
   6449 		 * Get "after" change value, if it fails, simply return the
   6450 		 * before value.
   6451 		 */
   6452 		ava.va_mask = AT_CTIME|AT_SEQ;
   6453 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   6454 			ava.va_ctime = bva.va_ctime;
   6455 			ava.va_seq = 0;
   6456 		}
   6457 
   6458 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   6459 
   6460 		/*
   6461 		 * The cinfo->atomic = TRUE only if we have
   6462 		 * non-zero va_seq's, and it has incremented by exactly one
   6463 		 * during the create_vnode and it didn't
   6464 		 * change during the VOP_FSYNC.
   6465 		 */
   6466 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
   6467 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
   6468 			cinfo->atomic = TRUE;
   6469 		else
   6470 			cinfo->atomic = FALSE;
   6471 	}
   6472 
   6473 	/* Check for mandatory locking and that the size gets set. */
   6474 	cva.va_mask = AT_MODE;
   6475 	if (setsize)
   6476 		cva.va_mask |= AT_SIZE;
   6477 
   6478 	/* Assume the worst */
   6479 	cs->mandlock = TRUE;
   6480 
   6481 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
   6482 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
   6483 
   6484 		/*
   6485 		 * Truncate the file if necessary; this would be
   6486 		 * the case for create over an existing file.
   6487 		 */
   6488 
   6489 		if (trunc) {
   6490 			int in_crit = 0;
   6491 			rfs4_file_t *fp;
   6492 			bool_t create = FALSE;
   6493 
   6494 			/*
   6495 			 * We are writing over an existing file.
   6496 			 * Check to see if we need to recall a delegation.
   6497 			 */
   6498 			rfs4_hold_deleg_policy();
   6499 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
   6500 				if (rfs4_check_delegated_byfp(FWRITE, fp,
   6501 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
   6502 					rfs4_file_rele(fp);
   6503 					rfs4_rele_deleg_policy();
   6504 					VN_RELE(vp);
   6505 					*attrset = 0;
   6506 					return (NFS4ERR_DELAY);
   6507 				}
   6508 				rfs4_file_rele(fp);
   6509 			}
   6510 			rfs4_rele_deleg_policy();
   6511 
   6512 			if (nbl_need_check(vp)) {
   6513 				in_crit = 1;
   6514 
   6515 				ASSERT(reqsize == 0);
   6516 
   6517 				nbl_start_crit(vp, RW_READER);
   6518 				if (nbl_conflict(vp, NBL_WRITE, 0,
   6519 				    cva.va_size, 0, NULL)) {
   6520 					in_crit = 0;
   6521 					nbl_end_crit(vp);
   6522 					VN_RELE(vp);
   6523 					*attrset = 0;
   6524 					return (NFS4ERR_ACCESS);
   6525 				}
   6526 			}
   6527 			ct.cc_sysid = 0;
   6528 			ct.cc_pid = 0;
   6529 			ct.cc_caller_id = nfs4_srv_caller_id;
   6530 			ct.cc_flags = CC_DONTBLOCK;
   6531 
   6532 			cva.va_mask = AT_SIZE;
   6533 			cva.va_size = reqsize;
   6534 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
   6535 			if (in_crit)
   6536 				nbl_end_crit(vp);
   6537 		}
   6538 	}
   6539 
   6540 	error = makefh4(&cs->fh, vp, cs->exi);
   6541 
   6542 	/*
   6543 	 * Force modified data and metadata out to stable storage.
   6544 	 */
   6545 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
   6546 
   6547 	if (error) {
   6548 		VN_RELE(vp);
   6549 		*attrset = 0;
   6550 		return (puterrno4(error));
   6551 	}
   6552 
   6553 	/* if parent dir is attrdir, set namedattr fh flag */
   6554 	if (dvp->v_flag & V_XATTRDIR)
   6555 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
   6556 
   6557 	if (cs->vp)
   6558 		VN_RELE(cs->vp);
   6559 
   6560 	cs->vp = vp;
   6561 
   6562 	/*
   6563 	 * if we did not create the file, we will need to check
   6564 	 * the access bits on the file
   6565 	 */
   6566 
   6567 	if (!created) {
   6568 		if (setsize)
   6569 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
   6570 		status = check_open_access(args->share_access, cs, req);
   6571 		if (status != NFS4_OK)
   6572 			*attrset = 0;
   6573 	}
   6574 	return (status);
   6575 }
   6576 
   6577 /*ARGSUSED*/
   6578 static void
   6579 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
   6580     rfs4_openowner_t *oo, delegreq_t deleg,
   6581     uint32_t access, uint32_t deny,
   6582     OPEN4res *resp, int deleg_cur)
   6583 {
   6584 	/* XXX Currently not using req  */
   6585 	rfs4_state_t *sp;
   6586 	rfs4_file_t *fp;
   6587 	bool_t screate = TRUE;
   6588 	bool_t fcreate = TRUE;
   6589 	uint32_t open_a, share_a;
   6590 	uint32_t open_d, share_d;
   6591 	rfs4_deleg_state_t *dsp;
   6592 	sysid_t sysid;
   6593 	nfsstat4 status;
   6594 	caller_context_t ct;
   6595 	int fflags = 0;
   6596 	int recall = 0;
   6597 	int err;
   6598 	int first_open;
   6599 
   6600 	/* get the file struct and hold a lock on it during initial open */
   6601 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
   6602 	if (fp == NULL) {
   6603 		resp->status = NFS4ERR_RESOURCE;
   6604 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
   6605 		return;
   6606 	}
   6607 
   6608 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
   6609 	if (sp == NULL) {
   6610 		resp->status = NFS4ERR_RESOURCE;
   6611 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
   6612 		/* No need to keep any reference */
   6613 		rw_exit(&fp->rf_file_rwlock);
   6614 		rfs4_file_rele(fp);
   6615 		return;
   6616 	}
   6617 
   6618 	/* try to get the sysid before continuing */
   6619 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
   6620 		resp->status = status;
   6621 		rfs4_file_rele(fp);
   6622 		/* Not a fully formed open; "close" it */
   6623 		if (screate == TRUE)
   6624 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6625 		rfs4_state_rele(sp);
   6626 		return;
   6627 	}
   6628 
   6629 	/* Calculate the fflags for this OPEN. */
   6630 	if (access & OPEN4_SHARE_ACCESS_READ)
   6631 		fflags |= FREAD;
   6632 	if (access & OPEN4_SHARE_ACCESS_WRITE)
   6633 		fflags |= FWRITE;
   6634 
   6635 	rfs4_dbe_lock(sp->rs_dbe);
   6636 
   6637 	/*
   6638 	 * Calculate the new deny and access mode that this open is adding to
   6639 	 * the file for this open owner;
   6640 	 */
   6641 	open_d = (deny & ~sp->rs_open_deny);
   6642 	open_a = (access & ~sp->rs_open_access);
   6643 
   6644 	/*
   6645 	 * Calculate the new share access and share deny modes that this open
   6646 	 * is adding to the file for this open owner;
   6647 	 */
   6648 	share_a = (access & ~sp->rs_share_access);
   6649 	share_d = (deny & ~sp->rs_share_deny);
   6650 
   6651 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
   6652 
   6653 	/*
   6654 	 * Check to see the client has already sent an open for this
   6655 	 * open owner on this file with the same share/deny modes.
   6656 	 * If so, we don't need to check for a conflict and we don't
   6657 	 * need to add another shrlock.  If not, then we need to
   6658 	 * check for conflicts in deny and access before checking for
   6659 	 * conflicts in delegation.  We don't want to recall a
   6660 	 * delegation based on an open that will eventually fail based
   6661 	 * on shares modes.
   6662 	 */
   6663 
   6664 	if (share_a || share_d) {
   6665 		if ((err = rfs4_share(sp, access, deny)) != 0) {
   6666 			rfs4_dbe_unlock(sp->rs_dbe);
   6667 			resp->status = err;
   6668 
   6669 			rfs4_file_rele(fp);
   6670 			/* Not a fully formed open; "close" it */
   6671 			if (screate == TRUE)
   6672 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6673 			rfs4_state_rele(sp);
   6674 			return;
   6675 		}
   6676 	}
   6677 
   6678 	rfs4_dbe_lock(fp->rf_dbe);
   6679 
   6680 	/*
   6681 	 * Check to see if this file is delegated and if so, if a
   6682 	 * recall needs to be done.
   6683 	 */
   6684 	if (rfs4_check_recall(sp, access)) {
   6685 		rfs4_dbe_unlock(fp->rf_dbe);
   6686 		rfs4_dbe_unlock(sp->rs_dbe);
   6687 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
   6688 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
   6689 		rfs4_dbe_lock(sp->rs_dbe);
   6690 
   6691 		/* if state closed while lock was dropped */
   6692 		if (sp->rs_closed) {
   6693 			if (share_a || share_d)
   6694 				(void) rfs4_unshare(sp);
   6695 			rfs4_dbe_unlock(sp->rs_dbe);
   6696 			rfs4_file_rele(fp);
   6697 			/* Not a fully formed open; "close" it */
   6698 			if (screate == TRUE)
   6699 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6700 			rfs4_state_rele(sp);
   6701 			resp->status = NFS4ERR_OLD_STATEID;
   6702 			return;
   6703 		}
   6704 
   6705 		rfs4_dbe_lock(fp->rf_dbe);
   6706 		/* Let's see if the delegation was returned */
   6707 		if (rfs4_check_recall(sp, access)) {
   6708 			rfs4_dbe_unlock(fp->rf_dbe);
   6709 			if (share_a || share_d)
   6710 				(void) rfs4_unshare(sp);
   6711 			rfs4_dbe_unlock(sp->rs_dbe);
   6712 			rfs4_file_rele(fp);
   6713 			rfs4_update_lease(sp->rs_owner->ro_client);
   6714 
   6715 			/* Not a fully formed open; "close" it */
   6716 			if (screate == TRUE)
   6717 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6718 			rfs4_state_rele(sp);
   6719 			resp->status = NFS4ERR_DELAY;
   6720 			return;
   6721 		}
   6722 	}
   6723 	/*
   6724 	 * the share check passed and any delegation conflict has been
   6725 	 * taken care of, now call vop_open.
   6726 	 * if this is the first open then call vop_open with fflags.
   6727 	 * if not, call vn_open_upgrade with just the upgrade flags.
   6728 	 *
   6729 	 * if the file has been opened already, it will have the current
   6730 	 * access mode in the state struct.  if it has no share access, then
   6731 	 * this is a new open.
   6732 	 *
   6733 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
   6734 	 * call VOP_OPEN(), just do the open upgrade.
   6735 	 */
   6736 	if (first_open && !deleg_cur) {
   6737 		ct.cc_sysid = sysid;
   6738 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
   6739 		ct.cc_caller_id = nfs4_srv_caller_id;
   6740 		ct.cc_flags = CC_DONTBLOCK;
   6741 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
   6742 		if (err) {
   6743 			rfs4_dbe_unlock(fp->rf_dbe);
   6744 			if (share_a || share_d)
   6745 				(void) rfs4_unshare(sp);
   6746 			rfs4_dbe_unlock(sp->rs_dbe);
   6747 			rfs4_file_rele(fp);
   6748 
   6749 			/* Not a fully formed open; "close" it */
   6750 			if (screate == TRUE)
   6751 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6752 			rfs4_state_rele(sp);
   6753 			/* check if a monitor detected a delegation conflict */
   6754 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
   6755 				resp->status = NFS4ERR_DELAY;
   6756 			else
   6757 				resp->status = NFS4ERR_SERVERFAULT;
   6758 			return;
   6759 		}
   6760 	} else { /* open upgrade */
   6761 		/*
   6762 		 * calculate the fflags for the new mode that is being added
   6763 		 * by this upgrade.
   6764 		 */
   6765 		fflags = 0;
   6766 		if (open_a & OPEN4_SHARE_ACCESS_READ)
   6767 			fflags |= FREAD;
   6768 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
   6769 			fflags |= FWRITE;
   6770 		vn_open_upgrade(cs->vp, fflags);
   6771 	}
   6772 	sp->rs_open_access |= access;
   6773 	sp->rs_open_deny |= deny;
   6774 
   6775 	if (open_d & OPEN4_SHARE_DENY_READ)
   6776 		fp->rf_deny_read++;
   6777 	if (open_d & OPEN4_SHARE_DENY_WRITE)
   6778 		fp->rf_deny_write++;
   6779 	fp->rf_share_deny |= deny;
   6780 
   6781 	if (open_a & OPEN4_SHARE_ACCESS_READ)
   6782 		fp->rf_access_read++;
   6783 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
   6784 		fp->rf_access_write++;
   6785 	fp->rf_share_access |= access;
   6786 
   6787 	/*
   6788 	 * Check for delegation here. if the deleg argument is not
   6789 	 * DELEG_ANY, then this is a reclaim from a client and
   6790 	 * we must honor the delegation requested. If necessary we can
   6791 	 * set the recall flag.
   6792 	 */
   6793 
   6794 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
   6795 
   6796 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
   6797 
   6798 	next_stateid(&sp->rs_stateid);
   6799 
   6800 	resp->stateid = sp->rs_stateid.stateid;
   6801 
   6802 	rfs4_dbe_unlock(fp->rf_dbe);
   6803 	rfs4_dbe_unlock(sp->rs_dbe);
   6804 
   6805 	if (dsp) {
   6806 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
   6807 		rfs4_deleg_state_rele(dsp);
   6808 	}
   6809 
   6810 	rfs4_file_rele(fp);
   6811 	rfs4_state_rele(sp);
   6812 
   6813 	resp->status = NFS4_OK;
   6814 }
   6815 
   6816 /*ARGSUSED*/
   6817 static void
   6818 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
   6819     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6820 {
   6821 	change_info4 *cinfo = &resp->cinfo;
   6822 	bitmap4 *attrset = &resp->attrset;
   6823 
   6824 	if (args->opentype == OPEN4_NOCREATE)
   6825 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
   6826 		    req, cs, args->share_access, cinfo);
   6827 	else {
   6828 		/* inhibit delegation grants during exclusive create */
   6829 
   6830 		if (args->mode == EXCLUSIVE4)
   6831 			rfs4_disable_delegation();
   6832 
   6833 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
   6834 		    oo->ro_client->rc_clientid);
   6835 	}
   6836 
   6837 	if (resp->status == NFS4_OK) {
   6838 
   6839 		/* cs->vp cs->fh now reference the desired file */
   6840 
   6841 		rfs4_do_open(cs, req, oo,
   6842 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
   6843 		    args->share_access, args->share_deny, resp, 0);
   6844 
   6845 		/*
   6846 		 * If rfs4_createfile set attrset, we must
   6847 		 * clear this attrset before the response is copied.
   6848 		 */
   6849 		if (resp->status != NFS4_OK && resp->attrset) {
   6850 			resp->attrset = 0;
   6851 		}
   6852 	}
   6853 	else
   6854 		*cs->statusp = resp->status;
   6855 
   6856 	if (args->mode == EXCLUSIVE4)
   6857 		rfs4_enable_delegation();
   6858 }
   6859 
   6860 /*ARGSUSED*/
   6861 static void
   6862 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
   6863     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6864 {
   6865 	change_info4 *cinfo = &resp->cinfo;
   6866 	vattr_t va;
   6867 	vtype_t v_type = cs->vp->v_type;
   6868 	int error = 0;
   6869 
   6870 	/* Verify that we have a regular file */
   6871 	if (v_type != VREG) {
   6872 		if (v_type == VDIR)
   6873 			resp->status = NFS4ERR_ISDIR;
   6874 		else if (v_type == VLNK)
   6875 			resp->status = NFS4ERR_SYMLINK;
   6876 		else
   6877 			resp->status = NFS4ERR_INVAL;
   6878 		return;
   6879 	}
   6880 
   6881 	va.va_mask = AT_MODE|AT_UID;
   6882 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
   6883 	if (error) {
   6884 		resp->status = puterrno4(error);
   6885 		return;
   6886 	}
   6887 
   6888 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
   6889 
   6890 	/*
   6891 	 * Check if we have access to the file, Note the the file
   6892 	 * could have originally been open UNCHECKED or GUARDED
   6893 	 * with mode bits that will now fail, but there is nothing
   6894 	 * we can really do about that except in the case that the
   6895 	 * owner of the file is the one requesting the open.
   6896 	 */
   6897 	if (crgetuid(cs->cr) != va.va_uid) {
   6898 		resp->status = check_open_access(args->share_access, cs, req);
   6899 		if (resp->status != NFS4_OK) {
   6900 			return;
   6901 		}
   6902 	}
   6903 
   6904 	/*
   6905 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
   6906 	 */
   6907 	cinfo->before = 0;
   6908 	cinfo->after = 0;
   6909 	cinfo->atomic = FALSE;
   6910 
   6911 	rfs4_do_open(cs, req, oo,
   6912 	    NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
   6913 	    args->share_access, args->share_deny, resp, 0);
   6914 }
   6915 
   6916 static void
   6917 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
   6918     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6919 {
   6920 	int error;
   6921 	nfsstat4 status;
   6922 	stateid4 stateid =
   6923 	    args->open_claim4_u.delegate_cur_info.delegate_stateid;
   6924 	rfs4_deleg_state_t *dsp;
   6925 
   6926 	/*
   6927 	 * Find the state info from the stateid and confirm that the
   6928 	 * file is delegated.  If the state openowner is the same as
   6929 	 * the supplied openowner we're done. If not, get the file
   6930 	 * info from the found state info. Use that file info to
   6931 	 * create the state for this lock owner. Note solaris doen't
   6932 	 * really need the pathname to find the file. We may want to
   6933 	 * lookup the pathname and make sure that the vp exist and
   6934 	 * matches the vp in the file structure. However it is
   6935 	 * possible that the pathname nolonger exists (local process
   6936 	 * unlinks the file), so this may not be that useful.
   6937 	 */
   6938 
   6939 	status = rfs4_get_deleg_state(&stateid, &dsp);
   6940 	if (status != NFS4_OK) {
   6941 		resp->status = status;
   6942 		return;
   6943 	}
   6944 
   6945 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
   6946 
   6947 	/*
   6948 	 * New lock owner, create state. Since this was probably called
   6949 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
   6950 	 */
   6951 
   6952 	ASSERT(cs->vp != NULL);
   6953 	VN_RELE(cs->vp);
   6954 	VN_HOLD(dsp->rds_finfo->rf_vp);
   6955 	cs->vp = dsp->rds_finfo->rf_vp;
   6956 
   6957 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
   6958 		rfs4_deleg_state_rele(dsp);
   6959 		*cs->statusp = resp->status = puterrno4(error);
   6960 		return;
   6961 	}
   6962 
   6963 	/* Mark progress for delegation returns */
   6964 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
   6965 	rfs4_deleg_state_rele(dsp);
   6966 	rfs4_do_open(cs, req, oo, DELEG_NONE,
   6967 	    args->share_access, args->share_deny, resp, 1);
   6968 }
   6969 
   6970 /*ARGSUSED*/
   6971 static void
   6972 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
   6973     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6974 {
   6975 	/*
   6976 	 * Lookup the pathname, it must already exist since this file
   6977 	 * was delegated.
   6978 	 *
   6979 	 * Find the file and state info for this vp and open owner pair.
   6980 	 *	check that they are in fact delegated.
   6981 	 *	check that the state access and deny modes are the same.
   6982 	 *
   6983 	 * Return the delgation possibly seting the recall flag.
   6984 	 */
   6985 	rfs4_file_t *fp;
   6986 	rfs4_state_t *sp;
   6987 	bool_t create = FALSE;
   6988 	bool_t dcreate = FALSE;
   6989 	rfs4_deleg_state_t *dsp;
   6990 	nfsace4 *ace;
   6991 
   6992 	/* Note we ignore oflags */
   6993 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
   6994 	    req, cs, args->share_access, &resp->cinfo);
   6995 
   6996 	if (resp->status != NFS4_OK) {
   6997 		return;
   6998 	}
   6999 
   7000 	/* get the file struct and hold a lock on it during initial open */
   7001 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
   7002 	if (fp == NULL) {
   7003 		resp->status = NFS4ERR_RESOURCE;
   7004 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
   7005 		return;
   7006 	}
   7007 
   7008 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
   7009 	if (sp == NULL) {
   7010 		resp->status = NFS4ERR_SERVERFAULT;
   7011 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
   7012 		rw_exit(&fp->rf_file_rwlock);
   7013 		rfs4_file_rele(fp);
   7014 		return;
   7015 	}
   7016 
   7017 	rfs4_dbe_lock(sp->rs_dbe);
   7018 	rfs4_dbe_lock(fp->rf_dbe);
   7019 	if (args->share_access != sp->rs_share_access ||
   7020 	    args->share_deny != sp->rs_share_deny ||
   7021 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
   7022 		NFS4_DEBUG(rfs4_debug,
   7023 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
   7024 		rfs4_dbe_unlock(fp->rf_dbe);
   7025 		rfs4_dbe_unlock(sp->rs_dbe);
   7026 		rfs4_file_rele(fp);
   7027 		rfs4_state_rele(sp);
   7028 		resp->status = NFS4ERR_SERVERFAULT;
   7029 		return;
   7030 	}
   7031 	rfs4_dbe_unlock(fp->rf_dbe);
   7032 	rfs4_dbe_unlock(sp->rs_dbe);
   7033 
   7034 	dsp = rfs4_finddeleg(sp, &dcreate);
   7035 	if (dsp == NULL) {
   7036 		rfs4_state_rele(sp);
   7037 		rfs4_file_rele(fp);
   7038 		resp->status = NFS4ERR_SERVERFAULT;
   7039 		return;
   7040 	}
   7041 
   7042 	next_stateid(&sp->rs_stateid);
   7043 
   7044 	resp->stateid = sp->rs_stateid.stateid;
   7045 
   7046 	resp->delegation.delegation_type = dsp->rds_dtype;
   7047 
   7048 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
   7049 		open_read_delegation4 *rv =
   7050 		    &resp->delegation.open_delegation4_u.read;
   7051 
   7052 		rv->stateid = dsp->rds_delegid.stateid;
   7053 		rv->recall = FALSE; /* no policy in place to set to TRUE */
   7054 		ace = &rv->permissions;
   7055 	} else {
   7056 		open_write_delegation4 *rv =
   7057 		    &resp->delegation.open_delegation4_u.write;
   7058 
   7059 		rv->stateid = dsp->rds_delegid.stateid;
   7060 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
   7061 		ace = &rv->permissions;
   7062 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
   7063 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
   7064 	}
   7065 
   7066 	/* XXX For now */
   7067 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
   7068 	ace->flag = 0;
   7069 	ace->access_mask = 0;
   7070 	ace->who.utf8string_len = 0;
   7071 	ace->who.utf8string_val = 0;
   7072 
   7073 	rfs4_deleg_state_rele(dsp);
   7074 	rfs4_state_rele(sp);
   7075 	rfs4_file_rele(fp);
   7076 }
   7077 
   7078 typedef enum {
   7079 	NFS4_CHKSEQ_OKAY = 0,
   7080 	NFS4_CHKSEQ_REPLAY = 1,
   7081 	NFS4_CHKSEQ_BAD = 2
   7082 } rfs4_chkseq_t;
   7083 
   7084 /*
   7085  * Generic function for sequence number checks.
   7086  */
   7087 static rfs4_chkseq_t
   7088 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
   7089     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
   7090 {
   7091 	/* Same sequence ids and matching operations? */
   7092 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
   7093 		if (copyres == TRUE) {
   7094 			rfs4_free_reply(resop);
   7095 			rfs4_copy_reply(resop, lastop);
   7096 		}
   7097 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
   7098 		    "Replayed SEQID %d\n", seqid));
   7099 		return (NFS4_CHKSEQ_REPLAY);
   7100 	}
   7101 
   7102 	/* If the incoming sequence is not the next expected then it is bad */
   7103 	if (rqst_seq != seqid + 1) {
   7104 		if (rqst_seq == seqid) {
   7105 			NFS4_DEBUG(rfs4_debug,
   7106 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
   7107 			    "but last op was %d current op is %d\n",
   7108 			    lastop->resop, resop->resop));
   7109 			return (NFS4_CHKSEQ_BAD);
   7110 		}
   7111 		NFS4_DEBUG(rfs4_debug,
   7112 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
   7113 		    rqst_seq, seqid));
   7114 		return (NFS4_CHKSEQ_BAD);
   7115 	}
   7116 
   7117 	/* Everything okay -- next expected */
   7118 	return (NFS4_CHKSEQ_OKAY);
   7119 }
   7120 
   7121 
   7122 static rfs4_chkseq_t
   7123 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
   7124 {
   7125 	rfs4_chkseq_t rc;
   7126 
   7127 	rfs4_dbe_lock(op->ro_dbe);
   7128 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
   7129 	    TRUE);
   7130 	rfs4_dbe_unlock(op->ro_dbe);
   7131 
   7132 	if (rc == NFS4_CHKSEQ_OKAY)
   7133 		rfs4_update_lease(op->ro_client);
   7134 
   7135 	return (rc);
   7136 }
   7137 
   7138 static rfs4_chkseq_t
   7139 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
   7140 {
   7141 	rfs4_chkseq_t rc;
   7142 
   7143 	rfs4_dbe_lock(op->ro_dbe);
   7144 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
   7145 	    olo_seqid, resop, FALSE);
   7146 	rfs4_dbe_unlock(op->ro_dbe);
   7147 
   7148 	return (rc);
   7149 }
   7150 
   7151 static rfs4_chkseq_t
   7152 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
   7153 {
   7154 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
   7155 
   7156 	rfs4_dbe_lock(lsp->rls_dbe);
   7157 	if (!lsp->rls_skip_seqid_check)
   7158 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
   7159 		    resop, TRUE);
   7160 	rfs4_dbe_unlock(lsp->rls_dbe);
   7161 
   7162 	return (rc);
   7163 }
   7164 
   7165 static void
   7166 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
   7167     struct svc_req *req, struct compound_state *cs)
   7168 {
   7169 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
   7170 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
   7171 	open_owner4 *owner = &args->owner;
   7172 	open_claim_type4 claim = args->claim;
   7173 	rfs4_client_t *cp;
   7174 	rfs4_openowner_t *oo;
   7175 	bool_t create;
   7176 	bool_t replay = FALSE;
   7177 	int can_reclaim;
   7178 
   7179 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
   7180 	    OPEN4args *, args);
   7181 
   7182 	if (cs->vp == NULL) {
   7183 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   7184 		goto end;
   7185 	}
   7186 
   7187 	/*
   7188 	 * Need to check clientid and lease expiration first based on
   7189 	 * error ordering and incrementing sequence id.
   7190 	 */
   7191 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
   7192 	if (cp == NULL) {
   7193 		*cs->statusp = resp->status =
   7194 		    rfs4_check_clientid(&owner->clientid, 0);
   7195 		goto end;
   7196 	}
   7197 
   7198 	if (rfs4_lease_expired(cp)) {
   7199 		rfs4_client_close(cp);
   7200 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   7201 		goto end;
   7202 	}
   7203 	can_reclaim = cp->rc_can_reclaim;
   7204 
   7205 	/*
   7206 	 * Find the open_owner for use from this point forward.  Take
   7207 	 * care in updating the sequence id based on the type of error
   7208 	 * being returned.
   7209 	 */
   7210 retry:
   7211 	create = TRUE;
   7212 	oo = rfs4_findopenowner(owner, &create, args->seqid);
   7213 	if (oo == NULL) {
   7214 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
   7215 		rfs4_client_rele(cp);
   7216 		goto end;
   7217 	}
   7218 
   7219 	/* Hold off access to the sequence space while the open is done */
   7220 	rfs4_sw_enter(&oo->ro_sw);
   7221 
   7222 	/*
   7223 	 * If the open_owner existed before at the server, then check
   7224 	 * the sequence id.
   7225 	 */
   7226 	if (!create && !oo->ro_postpone_confirm) {
   7227 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
   7228 		case NFS4_CHKSEQ_BAD:
   7229 			if ((args->seqid > oo->ro_open_seqid) &&
   7230 			    oo->ro_need_confirm) {
   7231 				rfs4_free_opens(oo, TRUE, FALSE);
   7232 				rfs4_sw_exit(&oo->ro_sw);
   7233 				rfs4_openowner_rele(oo);
   7234 				goto retry;
   7235 			}
   7236 			resp->status = NFS4ERR_BAD_SEQID;
   7237 			goto out;
   7238 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
   7239 			replay = TRUE;
   7240 			goto out;
   7241 		default:
   7242 			break;
   7243 		}
   7244 
   7245 		/*
   7246 		 * Sequence was ok and open owner exists
   7247 		 * check to see if we have yet to see an
   7248 		 * open_confirm.
   7249 		 */
   7250 		if (oo->ro_need_confirm) {
   7251 			rfs4_free_opens(oo, TRUE, FALSE);
   7252 			rfs4_sw_exit(&oo->ro_sw);
   7253 			rfs4_openowner_rele(oo);
   7254 			goto retry;
   7255 		}
   7256 	}
   7257 	/* Grace only applies to regular-type OPENs */
   7258 	if (rfs4_clnt_in_grace(cp) &&
   7259 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
   7260 		*cs->statusp = resp->status = NFS4ERR_GRACE;
   7261 		goto out;
   7262 	}
   7263 
   7264 	/*
   7265 	 * If previous state at the server existed then can_reclaim
   7266 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
   7267 	 * client.
   7268 	 */
   7269 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
   7270 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   7271 		goto out;
   7272 	}
   7273 
   7274 
   7275 	/*
   7276 	 * Reject the open if the client has missed the grace period
   7277 	 */
   7278 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
   7279 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   7280 		goto out;
   7281 	}
   7282 
   7283 	/* Couple of up-front bookkeeping items */
   7284 	if (oo->ro_need_confirm) {
   7285 		/*
   7286 		 * If this is a reclaim OPEN then we should not ask
   7287 		 * for a confirmation of the open_owner per the
   7288 		 * protocol specification.
   7289 		 */
   7290 		if (claim == CLAIM_PREVIOUS)
   7291 			oo->ro_need_confirm = FALSE;
   7292 		else
   7293 			resp->rflags |= OPEN4_RESULT_CONFIRM;
   7294 	}
   7295 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
   7296 
   7297 	/*
   7298 	 * If there is an unshared filesystem mounted on this vnode,
   7299 	 * do not allow to open/create in this directory.
   7300 	 */
   7301 	if (vn_ismntpt(cs->vp)) {
   7302 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   7303 		goto out;
   7304 	}
   7305 
   7306 	/*
   7307 	 * access must READ, WRITE, or BOTH.  No access is invalid.
   7308 	 * deny can be READ, WRITE, BOTH, or NONE.
   7309 	 * bits not defined for access/deny are invalid.
   7310 	 */
   7311 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
   7312 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
   7313 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
   7314 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   7315 		goto out;
   7316 	}
   7317 
   7318 
   7319 	/*
   7320 	 * make sure attrset is zero before response is built.
   7321 	 */
   7322 	resp->attrset = 0;
   7323 
   7324 	switch (claim) {
   7325 	case CLAIM_NULL:
   7326 		rfs4_do_opennull(cs, req, args, oo, resp);
   7327 		break;
   7328 	case CLAIM_PREVIOUS:
   7329 		rfs4_do_openprev(cs, req, args, oo, resp);
   7330 		break;
   7331 	case CLAIM_DELEGATE_CUR:
   7332 		rfs4_do_opendelcur(cs, req, args, oo, resp);
   7333 		break;
   7334 	case CLAIM_DELEGATE_PREV:
   7335 		rfs4_do_opendelprev(cs, req, args, oo, resp);
   7336 		break;
   7337 	default:
   7338 		resp->status = NFS4ERR_INVAL;
   7339 		break;
   7340 	}
   7341 
   7342 out:
   7343 	rfs4_client_rele(cp);
   7344 
   7345 	/* Catch sequence id handling here to make it a little easier */
   7346 	switch (resp->status) {
   7347 	case NFS4ERR_BADXDR:
   7348 	case NFS4ERR_BAD_SEQID:
   7349 	case NFS4ERR_BAD_STATEID:
   7350 	case NFS4ERR_NOFILEHANDLE:
   7351 	case NFS4ERR_RESOURCE:
   7352 	case NFS4ERR_STALE_CLIENTID:
   7353 	case NFS4ERR_STALE_STATEID:
   7354 		/*
   7355 		 * The protocol states that if any of these errors are
   7356 		 * being returned, the sequence id should not be
   7357 		 * incremented.  Any other return requires an
   7358 		 * increment.
   7359 		 */
   7360 		break;
   7361 	default:
   7362 		/* Always update the lease in this case */
   7363 		rfs4_update_lease(oo->ro_client);
   7364 
   7365 		/* Regular response - copy the result */
   7366 		if (!replay)
   7367 			rfs4_update_open_resp(oo, resop, &cs->fh);
   7368 
   7369 		/*
   7370 		 * REPLAY case: Only if the previous response was OK
   7371 		 * do we copy the filehandle.  If not OK, no
   7372 		 * filehandle to copy.
   7373 		 */
   7374 		if (replay == TRUE &&
   7375 		    resp->status == NFS4_OK &&
   7376 		    oo->ro_reply_fh.nfs_fh4_val) {
   7377 			/*
   7378 			 * If this is a replay, we must restore the
   7379 			 * current filehandle/vp to that of what was
   7380 			 * returned originally.  Try our best to do
   7381 			 * it.
   7382 			 */
   7383 			nfs_fh4_fmt_t *fh_fmtp =
   7384 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
   7385 
   7386 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
   7387 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
   7388 
   7389 			if (cs->exi == NULL) {
   7390 				resp->status = NFS4ERR_STALE;
   7391 				goto finish;
   7392 			}
   7393 
   7394 			VN_RELE(cs->vp);
   7395 
   7396 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
   7397 			    &resp->status);
   7398 
   7399 			if (cs->vp == NULL)
   7400 				goto finish;
   7401 
   7402 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
   7403 		}
   7404 
   7405 		/*
   7406 		 * If this was a replay, no need to update the
   7407 		 * sequence id. If the open_owner was not created on
   7408 		 * this pass, then update.  The first use of an
   7409 		 * open_owner will not bump the sequence id.
   7410 		 */
   7411 		if (replay == FALSE && !create)
   7412 			rfs4_update_open_sequence(oo);
   7413 		/*
   7414 		 * If the client is receiving an error and the
   7415 		 * open_owner needs to be confirmed, there is no way
   7416 		 * to notify the client of this fact ignoring the fact
   7417 		 * that the server has no method of returning a
   7418 		 * stateid to confirm.  Therefore, the server needs to
   7419 		 * mark this open_owner in a way as to avoid the
   7420 		 * sequence id checking the next time the client uses
   7421 		 * this open_owner.
   7422 		 */
   7423 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
   7424 			oo->ro_postpone_confirm = TRUE;
   7425 		/*
   7426 		 * If OK response then clear the postpone flag and
   7427 		 * reset the sequence id to keep in sync with the
   7428 		 * client.
   7429 		 */
   7430 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
   7431 			oo->ro_postpone_confirm = FALSE;
   7432 			oo->ro_open_seqid = args->seqid;
   7433 		}
   7434 		break;
   7435 	}
   7436 
   7437 finish:
   7438 	*cs->statusp = resp->status;
   7439 
   7440 	rfs4_sw_exit(&oo->ro_sw);
   7441 	rfs4_openowner_rele(oo);
   7442 
   7443 end:
   7444 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
   7445 	    OPEN4res *, resp);
   7446 }
   7447 
   7448 /*ARGSUSED*/
   7449 void
   7450 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
   7451     struct svc_req *req, struct compound_state *cs)
   7452 {
   7453 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
   7454 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
   7455 	rfs4_state_t *sp;
   7456 	nfsstat4 status;
   7457 
   7458 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
   7459 	    OPEN_CONFIRM4args *, args);
   7460 
   7461 	if (cs->vp == NULL) {
   7462 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   7463 		goto out;
   7464 	}
   7465 
   7466 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
   7467 	if (status != NFS4_OK) {
   7468 		*cs->statusp = resp->status = status;
   7469 		goto out;
   7470 	}
   7471 
   7472 	/* Ensure specified filehandle matches */
   7473 	if (cs->vp != sp->rs_finfo->rf_vp) {
   7474 		rfs4_state_rele(sp);
   7475 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7476 		goto out;
   7477 	}
   7478 
   7479 	/* hold off other access to open_owner while we tinker */
   7480 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
   7481 
   7482 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
   7483 	case NFS4_CHECK_STATEID_OKAY:
   7484 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7485 		    resop) != 0) {
   7486 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7487 			break;
   7488 		}
   7489 		/*
   7490 		 * If it is the appropriate stateid and determined to
   7491 		 * be "OKAY" then this means that the stateid does not
   7492 		 * need to be confirmed and the client is in error for
   7493 		 * sending an OPEN_CONFIRM.
   7494 		 */
   7495 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7496 		break;
   7497 	case NFS4_CHECK_STATEID_OLD:
   7498 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7499 		break;
   7500 	case NFS4_CHECK_STATEID_BAD:
   7501 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7502 		break;
   7503 	case NFS4_CHECK_STATEID_EXPIRED:
   7504 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   7505 		break;
   7506 	case NFS4_CHECK_STATEID_CLOSED:
   7507 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7508 		break;
   7509 	case NFS4_CHECK_STATEID_REPLAY:
   7510 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7511 		    resop)) {
   7512 		case NFS4_CHKSEQ_OKAY:
   7513 			/*
   7514 			 * This is replayed stateid; if seqid matches
   7515 			 * next expected, then client is using wrong seqid.
   7516 			 */
   7517 			/* fall through */
   7518 		case NFS4_CHKSEQ_BAD:
   7519 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7520 			break;
   7521 		case NFS4_CHKSEQ_REPLAY:
   7522 			/*
   7523 			 * Note this case is the duplicate case so
   7524 			 * resp->status is already set.
   7525 			 */
   7526 			*cs->statusp = resp->status;
   7527 			rfs4_update_lease(sp->rs_owner->ro_client);
   7528 			break;
   7529 		}
   7530 		break;
   7531 	case NFS4_CHECK_STATEID_UNCONFIRMED:
   7532 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7533 		    resop) != NFS4_CHKSEQ_OKAY) {
   7534 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7535 			break;
   7536 		}
   7537 		*cs->statusp = resp->status = NFS4_OK;
   7538 
   7539 		next_stateid(&sp->rs_stateid);
   7540 		resp->open_stateid = sp->rs_stateid.stateid;
   7541 		sp->rs_owner->ro_need_confirm = FALSE;
   7542 		rfs4_update_lease(sp->rs_owner->ro_client);
   7543 		rfs4_update_open_sequence(sp->rs_owner);
   7544 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
   7545 		break;
   7546 	default:
   7547 		ASSERT(FALSE);
   7548 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   7549 		break;
   7550 	}
   7551 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
   7552 	rfs4_state_rele(sp);
   7553 
   7554 out:
   7555 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
   7556 	    OPEN_CONFIRM4res *, resp);
   7557 }
   7558 
   7559 /*ARGSUSED*/
   7560 void
   7561 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
   7562     struct svc_req *req, struct compound_state *cs)
   7563 {
   7564 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
   7565 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
   7566 	uint32_t access = args->share_access;
   7567 	uint32_t deny = args->share_deny;
   7568 	nfsstat4 status;
   7569 	rfs4_state_t *sp;
   7570 	rfs4_file_t *fp;
   7571 	int fflags = 0;
   7572 
   7573 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
   7574 	    OPEN_DOWNGRADE4args *, args);
   7575 
   7576 	if (cs->vp == NULL) {
   7577 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   7578 		goto out;
   7579 	}
   7580 
   7581 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
   7582 	if (status != NFS4_OK) {
   7583 		*cs->statusp = resp->status = status;
   7584 		goto out;
   7585 	}
   7586 
   7587 	/* Ensure specified filehandle matches */
   7588 	if (cs->vp != sp->rs_finfo->rf_vp) {
   7589 		rfs4_state_rele(sp);
   7590 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7591 		goto out;
   7592 	}
   7593 
   7594 	/* hold off other access to open_owner while we tinker */
   7595 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
   7596 
   7597 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
   7598 	case NFS4_CHECK_STATEID_OKAY:
   7599 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7600 		    resop) != NFS4_CHKSEQ_OKAY) {
   7601 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7602 			goto end;
   7603 		}
   7604 		break;
   7605 	case NFS4_CHECK_STATEID_OLD:
   7606 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7607 		goto end;
   7608 	case NFS4_CHECK_STATEID_BAD:
   7609 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7610 		goto end;
   7611 	case NFS4_CHECK_STATEID_EXPIRED:
   7612 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   7613 		goto end;
   7614 	case NFS4_CHECK_STATEID_CLOSED:
   7615 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7616 		goto end;
   7617 	case NFS4_CHECK_STATEID_UNCONFIRMED:
   7618 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7619 		goto end;
   7620 	case NFS4_CHECK_STATEID_REPLAY:
   7621 		/* Check the sequence id for the open owner */
   7622 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7623 		    resop)) {
   7624 		case NFS4_CHKSEQ_OKAY:
   7625 			/*
   7626 			 * This is replayed stateid; if seqid matches
   7627 			 * next expected, then client is using wrong seqid.
   7628 			 */
   7629 			/* fall through */
   7630 		case NFS4_CHKSEQ_BAD:
   7631 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7632 			goto end;
   7633 		case NFS4_CHKSEQ_REPLAY:
   7634 			/*
   7635 			 * Note this case is the duplicate case so
   7636 			 * resp->status is already set.
   7637 			 */
   7638 			*cs->statusp = resp->status;
   7639 			rfs4_update_lease(sp->rs_owner->ro_client);
   7640 			goto end;
   7641 		}
   7642 		break;
   7643 	default:
   7644 		ASSERT(FALSE);
   7645 		break;
   7646 	}
   7647 
   7648 	rfs4_dbe_lock(sp->rs_dbe);
   7649 	/*
   7650 	 * Check that the new access modes and deny modes are valid.
   7651 	 * Check that no invalid bits are set.
   7652 	 */
   7653 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
   7654 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
   7655 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   7656 		rfs4_update_open_sequence(sp->rs_owner);
   7657 		rfs4_dbe_unlock(sp->rs_dbe);
   7658 		goto end;
   7659 	}
   7660 
   7661 	/*
   7662 	 * The new modes must be a subset of the current modes and
   7663 	 * the access must specify at least one mode. To test that
   7664 	 * the new mode is a subset of the current modes we bitwise
   7665 	 * AND them together and check that the result equals the new
   7666 	 * mode. For example:
   7667 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
   7668 	 * access & sp->rs_open_access == R == access, so the new access mode
   7669 	 * is valid. Consider access == RW, sp->rs_open_access = R
   7670 	 * access & sp->rs_open_access == R != access, so the new access mode
   7671 	 * is invalid.
   7672 	 */
   7673 	if ((access & sp->rs_open_access) != access ||
   7674 	    (deny & sp->rs_open_deny) != deny ||
   7675 	    (access &
   7676 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
   7677 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   7678 		rfs4_update_open_sequence(sp->rs_owner);
   7679 		rfs4_dbe_unlock(sp->rs_dbe);
   7680 		goto end;
   7681 	}
   7682 
   7683 	/*
   7684 	 * Release any share locks associated with this stateID.
   7685 	 * Strictly speaking, this violates the spec because the
   7686 	 * spec effectively requires that open downgrade be atomic.
   7687 	 * At present, fs_shrlock does not have this capability.
   7688 	 */
   7689 	(void) rfs4_unshare(sp);
   7690 
   7691 	status = rfs4_share(sp, access, deny);
   7692 	if (status != NFS4_OK) {
   7693 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   7694 		rfs4_update_open_sequence(sp->rs_owner);
   7695 		rfs4_dbe_unlock(sp<