Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 #include <sys/param.h>
     32 #include <sys/types.h>
     33 #include <sys/systm.h>
     34 #include <sys/cred.h>
     35 #include <sys/buf.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vfs_opreg.h>
     38 #include <sys/vnode.h>
     39 #include <sys/uio.h>
     40 #include <sys/errno.h>
     41 #include <sys/sysmacros.h>
     42 #include <sys/statvfs.h>
     43 #include <sys/kmem.h>
     44 #include <sys/dirent.h>
     45 #include <sys/cmn_err.h>
     46 #include <sys/debug.h>
     47 #include <sys/systeminfo.h>
     48 #include <sys/flock.h>
     49 #include <sys/pathname.h>
     50 #include <sys/nbmlock.h>
     51 #include <sys/share.h>
     52 #include <sys/atomic.h>
     53 #include <sys/policy.h>
     54 #include <sys/fem.h>
     55 #include <sys/sdt.h>
     56 #include <sys/ddi.h>
     57 #include <sys/zone.h>
     58 
     59 #include <fs/fs_reparse.h>
     60 
     61 #include <rpc/types.h>
     62 #include <rpc/auth.h>
     63 #include <rpc/rpcsec_gss.h>
     64 #include <rpc/svc.h>
     65 
     66 #include <nfs/nfs.h>
     67 #include <nfs/export.h>
     68 #include <nfs/nfs_cmd.h>
     69 #include <nfs/lm.h>
     70 #include <nfs/nfs4.h>
     71 
     72 #include <sys/strsubr.h>
     73 #include <sys/strsun.h>
     74 
     75 #include <inet/common.h>
     76 #include <inet/ip.h>
     77 #include <inet/ip6.h>
     78 
     79 #include <sys/tsol/label.h>
     80 #include <sys/tsol/tndb.h>
     81 
     82 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
     83 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
     84 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
     85 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
     86 extern struct svc_ops rdma_svc_ops;
     87 extern int nfs_loaned_buffers;
     88 /* End of Tunables */
     89 
     90 static int rdma_setup_read_data4(READ4args *, READ4res *);
     91 
     92 /*
     93  * Used to bump the stateid4.seqid value and show changes in the stateid
     94  */
     95 #define	next_stateid(sp) (++(sp)->bits.chgseq)
     96 
     97 /*
     98  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
     99  *	This is used to return NFS4ERR_TOOSMALL when clients specify
    100  *	maxcount that isn't large enough to hold the smallest possible
    101  *	XDR encoded dirent.
    102  *
    103  *	    sizeof cookie (8 bytes) +
    104  *	    sizeof name_len (4 bytes) +
    105  *	    sizeof smallest (padded) name (4 bytes) +
    106  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
    107  *	    sizeof attrlist4_len (4 bytes) +
    108  *	    sizeof next boolean (4 bytes)
    109  *
    110  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
    111  * the smallest possible entry4 (assumes no attrs requested).
    112  *	sizeof nfsstat4 (4 bytes) +
    113  *	sizeof verifier4 (8 bytes) +
    114  *	sizeof entry4list bool (4 bytes) +
    115  *	sizeof entry4 	(36 bytes) +
    116  *	sizeof eof bool  (4 bytes)
    117  *
    118  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
    119  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
    120  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
    121  *	required for a given name length.  MAXNAMELEN is the maximum
    122  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
    123  *	macros are to allow for . and .. entries -- just a minor tweak to try
    124  *	and guarantee that buffer we give to VOP_READDIR will be large enough
    125  *	to hold ., .., and the largest possible solaris dirent64.
    126  */
    127 #define	RFS4_MINLEN_ENTRY4 36
    128 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
    129 #define	RFS4_MINLEN_RDDIR_BUF \
    130 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
    131 
    132 /*
    133  * It would be better to pad to 4 bytes since that's what XDR would do,
    134  * but the dirents UFS gives us are already padded to 8, so just take
    135  * what we're given.  Dircount is only a hint anyway.  Currently the
    136  * solaris kernel is ASCII only, so there's no point in calling the
    137  * UTF8 functions.
    138  *
    139  * dirent64: named padded to provide 8 byte struct alignment
    140  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
    141  *
    142  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
    143  *
    144  */
    145 #define	DIRENT64_TO_DIRCOUNT(dp) \
    146 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
    147 
    148 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
    149 
    150 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
    151 
    152 u_longlong_t	nfs4_srv_caller_id;
    153 uint_t		nfs4_srv_vkey = 0;
    154 
    155 verifier4	Write4verf;
    156 verifier4	Readdir4verf;
    157 
    158 void	rfs4_init_compound_state(struct compound_state *);
    159 
    160 static void	nullfree(caddr_t);
    161 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    162 			struct compound_state *);
    163 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    164 			struct compound_state *);
    165 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    166 			struct compound_state *);
    167 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    168 			struct compound_state *);
    169 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    170 			struct compound_state *);
    171 static void	rfs4_op_create_free(nfs_resop4 *resop);
    172 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
    173 			struct svc_req *, struct compound_state *);
    174 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
    175 			struct svc_req *, struct compound_state *);
    176 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    177 			struct compound_state *);
    178 static void	rfs4_op_getattr_free(nfs_resop4 *);
    179 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    180 			struct compound_state *);
    181 static void	rfs4_op_getfh_free(nfs_resop4 *);
    182 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    183 			struct compound_state *);
    184 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    185 			struct compound_state *);
    186 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    187 			struct compound_state *);
    188 static void	lock_denied_free(nfs_resop4 *);
    189 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    190 			struct compound_state *);
    191 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    192 			struct compound_state *);
    193 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    194 			struct compound_state *);
    195 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    196 			struct compound_state *);
    197 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
    198 				struct svc_req *req, struct compound_state *cs);
    199 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    200 			struct compound_state *);
    201 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    202 			struct compound_state *);
    203 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
    204 			struct svc_req *, struct compound_state *);
    205 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
    206 			struct svc_req *, struct compound_state *);
    207 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    208 			struct compound_state *);
    209 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    210 			struct compound_state *);
    211 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    212 			struct compound_state *);
    213 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    214 			struct compound_state *);
    215 static void	rfs4_op_read_free(nfs_resop4 *);
    216 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
    217 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    218 			struct compound_state *);
    219 static void	rfs4_op_readlink_free(nfs_resop4 *);
    220 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
    221 			struct svc_req *, struct compound_state *);
    222 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    223 			struct compound_state *);
    224 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    225 			struct compound_state *);
    226 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    227 			struct compound_state *);
    228 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    229 			struct compound_state *);
    230 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    231 			struct compound_state *);
    232 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    233 			struct compound_state *);
    234 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    235 			struct compound_state *);
    236 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    237 			struct compound_state *);
    238 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
    239 			struct svc_req *, struct compound_state *);
    240 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
    241 			struct svc_req *req, struct compound_state *);
    242 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
    243 			struct compound_state *);
    244 static void	rfs4_op_secinfo_free(nfs_resop4 *);
    245 
    246 static nfsstat4 check_open_access(uint32_t,
    247 				struct compound_state *, struct svc_req *);
    248 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
    249 void rfs4_ss_clid(rfs4_client_t *);
    250 
    251 /*
    252  * translation table for attrs
    253  */
    254 struct nfs4_ntov_table {
    255 	union nfs4_attr_u *na;
    256 	uint8_t amap[NFS4_MAXNUM_ATTRS];
    257 	int attrcnt;
    258 	bool_t vfsstat;
    259 };
    260 
    261 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
    262 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
    263 				    struct nfs4_svgetit_arg *sargp);
    264 
    265 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
    266 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
    267 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
    268 
    269 fem_t		*deleg_rdops;
    270 fem_t		*deleg_wrops;
    271 
    272 rfs4_servinst_t *rfs4_cur_servinst = NULL;	/* current server instance */
    273 kmutex_t	rfs4_servinst_lock;	/* protects linked list */
    274 int		rfs4_seen_first_compound;	/* set first time we see one */
    275 
    276 /*
    277  * NFS4 op dispatch table
    278  */
    279 
    280 struct rfsv4disp {
    281 	void	(*dis_proc)();		/* proc to call */
    282 	void	(*dis_resfree)();	/* frees space allocated by proc */
    283 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
    284 };
    285 
    286 static struct rfsv4disp rfsv4disptab[] = {
    287 	/*
    288 	 * NFS VERSION 4
    289 	 */
    290 
    291 	/* RFS_NULL = 0 */
    292 	{rfs4_op_illegal, nullfree, 0},
    293 
    294 	/* UNUSED = 1 */
    295 	{rfs4_op_illegal, nullfree, 0},
    296 
    297 	/* UNUSED = 2 */
    298 	{rfs4_op_illegal, nullfree, 0},
    299 
    300 	/* OP_ACCESS = 3 */
    301 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
    302 
    303 	/* OP_CLOSE = 4 */
    304 	{rfs4_op_close, nullfree, 0},
    305 
    306 	/* OP_COMMIT = 5 */
    307 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
    308 
    309 	/* OP_CREATE = 6 */
    310 	{rfs4_op_create, nullfree, 0},
    311 
    312 	/* OP_DELEGPURGE = 7 */
    313 	{rfs4_op_delegpurge, nullfree, 0},
    314 
    315 	/* OP_DELEGRETURN = 8 */
    316 	{rfs4_op_delegreturn, nullfree, 0},
    317 
    318 	/* OP_GETATTR = 9 */
    319 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
    320 
    321 	/* OP_GETFH = 10 */
    322 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
    323 
    324 	/* OP_LINK = 11 */
    325 	{rfs4_op_link, nullfree, 0},
    326 
    327 	/* OP_LOCK = 12 */
    328 	{rfs4_op_lock, lock_denied_free, 0},
    329 
    330 	/* OP_LOCKT = 13 */
    331 	{rfs4_op_lockt, lock_denied_free, 0},
    332 
    333 	/* OP_LOCKU = 14 */
    334 	{rfs4_op_locku, nullfree, 0},
    335 
    336 	/* OP_LOOKUP = 15 */
    337 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
    338 
    339 	/* OP_LOOKUPP = 16 */
    340 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
    341 
    342 	/* OP_NVERIFY = 17 */
    343 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
    344 
    345 	/* OP_OPEN = 18 */
    346 	{rfs4_op_open, rfs4_free_reply, 0},
    347 
    348 	/* OP_OPENATTR = 19 */
    349 	{rfs4_op_openattr, nullfree, 0},
    350 
    351 	/* OP_OPEN_CONFIRM = 20 */
    352 	{rfs4_op_open_confirm, nullfree, 0},
    353 
    354 	/* OP_OPEN_DOWNGRADE = 21 */
    355 	{rfs4_op_open_downgrade, nullfree, 0},
    356 
    357 	/* OP_OPEN_PUTFH = 22 */
    358 	{rfs4_op_putfh, nullfree, RPC_ALL},
    359 
    360 	/* OP_PUTPUBFH = 23 */
    361 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
    362 
    363 	/* OP_PUTROOTFH = 24 */
    364 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
    365 
    366 	/* OP_READ = 25 */
    367 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
    368 
    369 	/* OP_READDIR = 26 */
    370 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
    371 
    372 	/* OP_READLINK = 27 */
    373 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
    374 
    375 	/* OP_REMOVE = 28 */
    376 	{rfs4_op_remove, nullfree, 0},
    377 
    378 	/* OP_RENAME = 29 */
    379 	{rfs4_op_rename, nullfree, 0},
    380 
    381 	/* OP_RENEW = 30 */
    382 	{rfs4_op_renew, nullfree, 0},
    383 
    384 	/* OP_RESTOREFH = 31 */
    385 	{rfs4_op_restorefh, nullfree, RPC_ALL},
    386 
    387 	/* OP_SAVEFH = 32 */
    388 	{rfs4_op_savefh, nullfree, RPC_ALL},
    389 
    390 	/* OP_SECINFO = 33 */
    391 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
    392 
    393 	/* OP_SETATTR = 34 */
    394 	{rfs4_op_setattr, nullfree, 0},
    395 
    396 	/* OP_SETCLIENTID = 35 */
    397 	{rfs4_op_setclientid, nullfree, 0},
    398 
    399 	/* OP_SETCLIENTID_CONFIRM = 36 */
    400 	{rfs4_op_setclientid_confirm, nullfree, 0},
    401 
    402 	/* OP_VERIFY = 37 */
    403 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
    404 
    405 	/* OP_WRITE = 38 */
    406 	{rfs4_op_write, nullfree, 0},
    407 
    408 	/* OP_RELEASE_LOCKOWNER = 39 */
    409 	{rfs4_op_release_lockowner, nullfree, 0},
    410 };
    411 
    412 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
    413 
    414 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
    415 
    416 #ifdef DEBUG
    417 
    418 int		rfs4_fillone_debug = 0;
    419 int		rfs4_no_stub_access = 1;
    420 int		rfs4_rddir_debug = 0;
    421 
    422 static char    *rfs4_op_string[] = {
    423 	"rfs4_op_null",
    424 	"rfs4_op_1 unused",
    425 	"rfs4_op_2 unused",
    426 	"rfs4_op_access",
    427 	"rfs4_op_close",
    428 	"rfs4_op_commit",
    429 	"rfs4_op_create",
    430 	"rfs4_op_delegpurge",
    431 	"rfs4_op_delegreturn",
    432 	"rfs4_op_getattr",
    433 	"rfs4_op_getfh",
    434 	"rfs4_op_link",
    435 	"rfs4_op_lock",
    436 	"rfs4_op_lockt",
    437 	"rfs4_op_locku",
    438 	"rfs4_op_lookup",
    439 	"rfs4_op_lookupp",
    440 	"rfs4_op_nverify",
    441 	"rfs4_op_open",
    442 	"rfs4_op_openattr",
    443 	"rfs4_op_open_confirm",
    444 	"rfs4_op_open_downgrade",
    445 	"rfs4_op_putfh",
    446 	"rfs4_op_putpubfh",
    447 	"rfs4_op_putrootfh",
    448 	"rfs4_op_read",
    449 	"rfs4_op_readdir",
    450 	"rfs4_op_readlink",
    451 	"rfs4_op_remove",
    452 	"rfs4_op_rename",
    453 	"rfs4_op_renew",
    454 	"rfs4_op_restorefh",
    455 	"rfs4_op_savefh",
    456 	"rfs4_op_secinfo",
    457 	"rfs4_op_setattr",
    458 	"rfs4_op_setclientid",
    459 	"rfs4_op_setclient_confirm",
    460 	"rfs4_op_verify",
    461 	"rfs4_op_write",
    462 	"rfs4_op_release_lockowner",
    463 	"rfs4_op_illegal"
    464 };
    465 #endif
    466 
    467 void	rfs4_ss_chkclid(rfs4_client_t *);
    468 
    469 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
    470 
    471 extern void	rfs4_free_fs_locations4(fs_locations4 *);
    472 
    473 #ifdef	nextdp
    474 #undef nextdp
    475 #endif
    476 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
    477 
    478 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
    479 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
    480 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
    481 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
    482 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
    483 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
    484 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
    485 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
    486 	NULL,			NULL
    487 };
    488 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
    489 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
    490 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
    491 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
    492 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
    493 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
    494 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
    495 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
    496 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
    497 	NULL,			NULL
    498 };
    499 
    500 int
    501 rfs4_srvrinit(void)
    502 {
    503 	timespec32_t verf;
    504 	int error;
    505 	extern void rfs4_attr_init();
    506 	extern krwlock_t rfs4_deleg_policy_lock;
    507 
    508 	/*
    509 	 * The following algorithm attempts to find a unique verifier
    510 	 * to be used as the write verifier returned from the server
    511 	 * to the client.  It is important that this verifier change
    512 	 * whenever the server reboots.  Of secondary importance, it
    513 	 * is important for the verifier to be unique between two
    514 	 * different servers.
    515 	 *
    516 	 * Thus, an attempt is made to use the system hostid and the
    517 	 * current time in seconds when the nfssrv kernel module is
    518 	 * loaded.  It is assumed that an NFS server will not be able
    519 	 * to boot and then to reboot in less than a second.  If the
    520 	 * hostid has not been set, then the current high resolution
    521 	 * time is used.  This will ensure different verifiers each
    522 	 * time the server reboots and minimize the chances that two
    523 	 * different servers will have the same verifier.
    524 	 * XXX - this is broken on LP64 kernels.
    525 	 */
    526 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
    527 	if (verf.tv_sec != 0) {
    528 		verf.tv_nsec = gethrestime_sec();
    529 	} else {
    530 		timespec_t tverf;
    531 
    532 		gethrestime(&tverf);
    533 		verf.tv_sec = (time_t)tverf.tv_sec;
    534 		verf.tv_nsec = tverf.tv_nsec;
    535 	}
    536 
    537 	Write4verf = *(uint64_t *)&verf;
    538 
    539 	rfs4_attr_init();
    540 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
    541 
    542 	/* Used to manage create/destroy of server state */
    543 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
    544 
    545 	/* Used to manage access to server instance linked list */
    546 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
    547 
    548 	/* Used to manage access to rfs4_deleg_policy */
    549 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
    550 
    551 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
    552 	if (error != 0) {
    553 		rfs4_disable_delegation();
    554 	} else {
    555 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
    556 		    &deleg_wrops);
    557 		if (error != 0) {
    558 			rfs4_disable_delegation();
    559 			fem_free(deleg_rdops);
    560 		}
    561 	}
    562 
    563 	nfs4_srv_caller_id = fs_new_caller_id();
    564 
    565 	lockt_sysid = lm_alloc_sysidt();
    566 
    567 	vsd_create(&nfs4_srv_vkey, NULL);
    568 
    569 	return (0);
    570 }
    571 
    572 void
    573 rfs4_srvrfini(void)
    574 {
    575 	extern krwlock_t rfs4_deleg_policy_lock;
    576 
    577 	if (lockt_sysid != LM_NOSYSID) {
    578 		lm_free_sysidt(lockt_sysid);
    579 		lockt_sysid = LM_NOSYSID;
    580 	}
    581 
    582 	mutex_destroy(&rfs4_deleg_lock);
    583 	mutex_destroy(&rfs4_state_lock);
    584 	rw_destroy(&rfs4_deleg_policy_lock);
    585 
    586 	fem_free(deleg_rdops);
    587 	fem_free(deleg_wrops);
    588 }
    589 
    590 void
    591 rfs4_init_compound_state(struct compound_state *cs)
    592 {
    593 	bzero(cs, sizeof (*cs));
    594 	cs->cont = TRUE;
    595 	cs->access = CS_ACCESS_DENIED;
    596 	cs->deleg = FALSE;
    597 	cs->mandlock = FALSE;
    598 	cs->fh.nfs_fh4_val = cs->fhbuf;
    599 }
    600 
    601 void
    602 rfs4_grace_start(rfs4_servinst_t *sip)
    603 {
    604 	rw_enter(&sip->rwlock, RW_WRITER);
    605 	sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
    606 	sip->grace_period = rfs4_grace_period;
    607 	rw_exit(&sip->rwlock);
    608 }
    609 
    610 /*
    611  * returns true if the instance's grace period has never been started
    612  */
    613 int
    614 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
    615 {
    616 	time_t start_time;
    617 
    618 	rw_enter(&sip->rwlock, RW_READER);
    619 	start_time = sip->start_time;
    620 	rw_exit(&sip->rwlock);
    621 
    622 	return (start_time == 0);
    623 }
    624 
    625 /*
    626  * Indicates if server instance is within the
    627  * grace period.
    628  */
    629 int
    630 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
    631 {
    632 	time_t grace_expiry;
    633 
    634 	rw_enter(&sip->rwlock, RW_READER);
    635 	grace_expiry = sip->start_time + sip->grace_period;
    636 	rw_exit(&sip->rwlock);
    637 
    638 	return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
    639 }
    640 
    641 int
    642 rfs4_clnt_in_grace(rfs4_client_t *cp)
    643 {
    644 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    645 
    646 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
    647 }
    648 
    649 /*
    650  * reset all currently active grace periods
    651  */
    652 void
    653 rfs4_grace_reset_all(void)
    654 {
    655 	rfs4_servinst_t *sip;
    656 
    657 	mutex_enter(&rfs4_servinst_lock);
    658 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
    659 		if (rfs4_servinst_in_grace(sip))
    660 			rfs4_grace_start(sip);
    661 	mutex_exit(&rfs4_servinst_lock);
    662 }
    663 
    664 /*
    665  * start any new instances' grace periods
    666  */
    667 void
    668 rfs4_grace_start_new(void)
    669 {
    670 	rfs4_servinst_t *sip;
    671 
    672 	mutex_enter(&rfs4_servinst_lock);
    673 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
    674 		if (rfs4_servinst_grace_new(sip))
    675 			rfs4_grace_start(sip);
    676 	mutex_exit(&rfs4_servinst_lock);
    677 }
    678 
    679 static rfs4_dss_path_t *
    680 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
    681 {
    682 	size_t len;
    683 	rfs4_dss_path_t *dss_path;
    684 
    685 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
    686 
    687 	/*
    688 	 * Take a copy of the string, since the original may be overwritten.
    689 	 * Sadly, no strdup() in the kernel.
    690 	 */
    691 	/* allow for NUL */
    692 	len = strlen(path) + 1;
    693 	dss_path->path = kmem_alloc(len, KM_SLEEP);
    694 	(void) strlcpy(dss_path->path, path, len);
    695 
    696 	/* associate with servinst */
    697 	dss_path->sip = sip;
    698 	dss_path->index = index;
    699 
    700 	/*
    701 	 * Add to list of served paths.
    702 	 * No locking required, as we're only ever called at startup.
    703 	 */
    704 	if (rfs4_dss_pathlist == NULL) {
    705 		/* this is the first dss_path_t */
    706 
    707 		/* needed for insque/remque */
    708 		dss_path->next = dss_path->prev = dss_path;
    709 
    710 		rfs4_dss_pathlist = dss_path;
    711 	} else {
    712 		insque(dss_path, rfs4_dss_pathlist);
    713 	}
    714 
    715 	return (dss_path);
    716 }
    717 
    718 /*
    719  * Create a new server instance, and make it the currently active instance.
    720  * Note that starting the grace period too early will reduce the clients'
    721  * recovery window.
    722  */
    723 void
    724 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
    725 {
    726 	unsigned i;
    727 	rfs4_servinst_t *sip;
    728 	rfs4_oldstate_t *oldstate;
    729 
    730 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
    731 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
    732 
    733 	sip->start_time = (time_t)0;
    734 	sip->grace_period = (time_t)0;
    735 	sip->next = NULL;
    736 	sip->prev = NULL;
    737 
    738 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
    739 	/*
    740 	 * This initial dummy entry is required to setup for insque/remque.
    741 	 * It must be skipped over whenever the list is traversed.
    742 	 */
    743 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
    744 	/* insque/remque require initial list entry to be self-terminated */
    745 	oldstate->next = oldstate;
    746 	oldstate->prev = oldstate;
    747 	sip->oldstate = oldstate;
    748 
    749 
    750 	sip->dss_npaths = dss_npaths;
    751 	sip->dss_paths = kmem_alloc(dss_npaths *
    752 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
    753 
    754 	for (i = 0; i < dss_npaths; i++) {
    755 		sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
    756 	}
    757 
    758 	mutex_enter(&rfs4_servinst_lock);
    759 	if (rfs4_cur_servinst != NULL) {
    760 		/* add to linked list */
    761 		sip->prev = rfs4_cur_servinst;
    762 		rfs4_cur_servinst->next = sip;
    763 	}
    764 	if (start_grace)
    765 		rfs4_grace_start(sip);
    766 	/* make the new instance "current" */
    767 	rfs4_cur_servinst = sip;
    768 
    769 	mutex_exit(&rfs4_servinst_lock);
    770 }
    771 
    772 /*
    773  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
    774  * all instances directly.
    775  */
    776 void
    777 rfs4_servinst_destroy_all(void)
    778 {
    779 	rfs4_servinst_t *sip, *prev, *current;
    780 #ifdef DEBUG
    781 	int n = 0;
    782 #endif
    783 
    784 	mutex_enter(&rfs4_servinst_lock);
    785 	ASSERT(rfs4_cur_servinst != NULL);
    786 	current = rfs4_cur_servinst;
    787 	rfs4_cur_servinst = NULL;
    788 	for (sip = current; sip != NULL; sip = prev) {
    789 		prev = sip->prev;
    790 		rw_destroy(&sip->rwlock);
    791 		if (sip->oldstate)
    792 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
    793 		if (sip->dss_paths)
    794 			kmem_free(sip->dss_paths,
    795 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
    796 		kmem_free(sip, sizeof (rfs4_servinst_t));
    797 #ifdef DEBUG
    798 		n++;
    799 #endif
    800 	}
    801 	mutex_exit(&rfs4_servinst_lock);
    802 }
    803 
    804 /*
    805  * Assign the current server instance to a client_t.
    806  * Should be called with cp->rc_dbe held.
    807  */
    808 void
    809 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
    810 {
    811 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    812 
    813 	/*
    814 	 * The lock ensures that if the current instance is in the process
    815 	 * of changing, we will see the new one.
    816 	 */
    817 	mutex_enter(&rfs4_servinst_lock);
    818 	cp->rc_server_instance = sip;
    819 	mutex_exit(&rfs4_servinst_lock);
    820 }
    821 
    822 rfs4_servinst_t *
    823 rfs4_servinst(rfs4_client_t *cp)
    824 {
    825 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
    826 
    827 	return (cp->rc_server_instance);
    828 }
    829 
    830 /* ARGSUSED */
    831 static void
    832 nullfree(caddr_t resop)
    833 {
    834 }
    835 
    836 /*
    837  * This is a fall-through for invalid or not implemented (yet) ops
    838  */
    839 /* ARGSUSED */
    840 static void
    841 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
    842 	struct compound_state *cs)
    843 {
    844 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
    845 }
    846 
    847 /*
    848  * Check if the security flavor, nfsnum, is in the flavor_list.
    849  */
    850 bool_t
    851 in_flavor_list(int nfsnum, int *flavor_list, int count)
    852 {
    853 	int i;
    854 
    855 	for (i = 0; i < count; i++) {
    856 		if (nfsnum == flavor_list[i])
    857 			return (TRUE);
    858 	}
    859 	return (FALSE);
    860 }
    861 
    862 /*
    863  * Used by rfs4_op_secinfo to get the security information from the
    864  * export structure associated with the component.
    865  */
    866 /* ARGSUSED */
    867 static nfsstat4
    868 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
    869 {
    870 	int error, different_export = 0;
    871 	vnode_t *dvp, *vp, *tvp;
    872 	struct exportinfo *exi = NULL;
    873 	fid_t fid;
    874 	uint_t count, i;
    875 	secinfo4 *resok_val;
    876 	struct secinfo *secp;
    877 	seconfig_t *si;
    878 	bool_t did_traverse;
    879 	int dotdot, walk;
    880 
    881 	dvp = cs->vp;
    882 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
    883 
    884 	/*
    885 	 * If dotdotting, then need to check whether it's above the
    886 	 * root of a filesystem, or above an export point.
    887 	 */
    888 	if (dotdot) {
    889 
    890 		/*
    891 		 * If dotdotting at the root of a filesystem, then
    892 		 * need to traverse back to the mounted-on filesystem
    893 		 * and do the dotdot lookup there.
    894 		 */
    895 		if (cs->vp->v_flag & VROOT) {
    896 
    897 			/*
    898 			 * If at the system root, then can
    899 			 * go up no further.
    900 			 */
    901 			if (VN_CMP(dvp, rootdir))
    902 				return (puterrno4(ENOENT));
    903 
    904 			/*
    905 			 * Traverse back to the mounted-on filesystem
    906 			 */
    907 			dvp = untraverse(cs->vp);
    908 
    909 			/*
    910 			 * Set the different_export flag so we remember
    911 			 * to pick up a new exportinfo entry for
    912 			 * this new filesystem.
    913 			 */
    914 			different_export = 1;
    915 		} else {
    916 
    917 			/*
    918 			 * If dotdotting above an export point then set
    919 			 * the different_export to get new export info.
    920 			 */
    921 			different_export = nfs_exported(cs->exi, cs->vp);
    922 		}
    923 	}
    924 
    925 	/*
    926 	 * Get the vnode for the component "nm".
    927 	 */
    928 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
    929 	    NULL, NULL, NULL);
    930 	if (error)
    931 		return (puterrno4(error));
    932 
    933 	/*
    934 	 * If the vnode is in a pseudo filesystem, or if the security flavor
    935 	 * used in the request is valid but not an explicitly shared flavor,
    936 	 * or the access bit indicates that this is a limited access,
    937 	 * check whether this vnode is visible.
    938 	 */
    939 	if (!different_export &&
    940 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
    941 	    cs->access & CS_ACCESS_LIMITED)) {
    942 		if (! nfs_visible(cs->exi, vp, &different_export)) {
    943 			VN_RELE(vp);
    944 			return (puterrno4(ENOENT));
    945 		}
    946 	}
    947 
    948 	/*
    949 	 * If it's a mountpoint, then traverse it.
    950 	 */
    951 	if (vn_ismntpt(vp)) {
    952 		tvp = vp;
    953 		if ((error = traverse(&tvp)) != 0) {
    954 			VN_RELE(vp);
    955 			return (puterrno4(error));
    956 		}
    957 		/* remember that we had to traverse mountpoint */
    958 		did_traverse = TRUE;
    959 		vp = tvp;
    960 		different_export = 1;
    961 	} else if (vp->v_vfsp != dvp->v_vfsp) {
    962 		/*
    963 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
    964 		 * then vp is probably an LOFS object.  We don't need the
    965 		 * realvp, we just need to know that we might have crossed
    966 		 * a server fs boundary and need to call checkexport4.
    967 		 * (LOFS lookup hides server fs mountpoints, and actually calls
    968 		 * traverse)
    969 		 */
    970 		different_export = 1;
    971 		did_traverse = FALSE;
    972 	}
    973 
    974 	/*
    975 	 * Get the export information for it.
    976 	 */
    977 	if (different_export) {
    978 
    979 		bzero(&fid, sizeof (fid));
    980 		fid.fid_len = MAXFIDSZ;
    981 		error = vop_fid_pseudo(vp, &fid);
    982 		if (error) {
    983 			VN_RELE(vp);
    984 			return (puterrno4(error));
    985 		}
    986 
    987 		if (dotdot)
    988 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
    989 		else
    990 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
    991 
    992 		if (exi == NULL) {
    993 			if (did_traverse == TRUE) {
    994 				/*
    995 				 * If this vnode is a mounted-on vnode,
    996 				 * but the mounted-on file system is not
    997 				 * exported, send back the secinfo for
    998 				 * the exported node that the mounted-on
    999 				 * vnode lives in.
   1000 				 */
   1001 				exi = cs->exi;
   1002 			} else {
   1003 				VN_RELE(vp);
   1004 				return (puterrno4(EACCES));
   1005 			}
   1006 		}
   1007 	} else {
   1008 		exi = cs->exi;
   1009 	}
   1010 	ASSERT(exi != NULL);
   1011 
   1012 
   1013 	/*
   1014 	 * Create the secinfo result based on the security information
   1015 	 * from the exportinfo structure (exi).
   1016 	 *
   1017 	 * Return all flavors for a pseudo node.
   1018 	 * For a real export node, return the flavor that the client
   1019 	 * has access with.
   1020 	 */
   1021 	ASSERT(RW_LOCK_HELD(&exported_lock));
   1022 	if (PSEUDO(exi)) {
   1023 		count = exi->exi_export.ex_seccnt; /* total sec count */
   1024 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
   1025 		secp = exi->exi_export.ex_secinfo;
   1026 
   1027 		for (i = 0; i < count; i++) {
   1028 			si = &secp[i].s_secinfo;
   1029 			resok_val[i].flavor = si->sc_rpcnum;
   1030 			if (resok_val[i].flavor == RPCSEC_GSS) {
   1031 				rpcsec_gss_info *info;
   1032 
   1033 				info = &resok_val[i].flavor_info;
   1034 				info->qop = si->sc_qop;
   1035 				info->service = (rpc_gss_svc_t)si->sc_service;
   1036 
   1037 				/* get oid opaque data */
   1038 				info->oid.sec_oid4_len =
   1039 				    si->sc_gss_mech_type->length;
   1040 				info->oid.sec_oid4_val = kmem_alloc(
   1041 				    si->sc_gss_mech_type->length, KM_SLEEP);
   1042 				bcopy(
   1043 				    si->sc_gss_mech_type->elements,
   1044 				    info->oid.sec_oid4_val,
   1045 				    info->oid.sec_oid4_len);
   1046 			}
   1047 		}
   1048 		resp->SECINFO4resok_len = count;
   1049 		resp->SECINFO4resok_val = resok_val;
   1050 	} else {
   1051 		int ret_cnt = 0, k = 0;
   1052 		int *flavor_list;
   1053 
   1054 		count = exi->exi_export.ex_seccnt; /* total sec count */
   1055 		secp = exi->exi_export.ex_secinfo;
   1056 
   1057 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
   1058 		/* find out which flavors to return */
   1059 		for (i = 0; i < count; i ++) {
   1060 			int access, flavor, perm;
   1061 
   1062 			flavor = secp[i].s_secinfo.sc_nfsnum;
   1063 			perm = secp[i].s_flags;
   1064 
   1065 			access = nfsauth4_secinfo_access(exi, cs->req,
   1066 			    flavor, perm);
   1067 
   1068 			if (! (access & NFSAUTH_DENIED) &&
   1069 			    ! (access & NFSAUTH_WRONGSEC)) {
   1070 				flavor_list[ret_cnt] = flavor;
   1071 				ret_cnt++;
   1072 			}
   1073 		}
   1074 
   1075 		/* Create the returning SECINFO value */
   1076 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
   1077 
   1078 		for (i = 0; i < count; i++) {
   1079 			/*
   1080 			 * If the flavor is in the flavor list,
   1081 			 * fill in resok_val.
   1082 			 */
   1083 			si = &secp[i].s_secinfo;
   1084 			if (in_flavor_list(si->sc_nfsnum,
   1085 			    flavor_list, ret_cnt)) {
   1086 				resok_val[k].flavor = si->sc_rpcnum;
   1087 				if (resok_val[k].flavor == RPCSEC_GSS) {
   1088 					rpcsec_gss_info *info;
   1089 
   1090 					info = &resok_val[k].flavor_info;
   1091 					info->qop = si->sc_qop;
   1092 					info->service = (rpc_gss_svc_t)
   1093 					    si->sc_service;
   1094 
   1095 					/* get oid opaque data */
   1096 					info->oid.sec_oid4_len =
   1097 					    si->sc_gss_mech_type->length;
   1098 					info->oid.sec_oid4_val = kmem_alloc(
   1099 					    si->sc_gss_mech_type->length,
   1100 					    KM_SLEEP);
   1101 					bcopy(si->sc_gss_mech_type->elements,
   1102 					    info->oid.sec_oid4_val,
   1103 					    info->oid.sec_oid4_len);
   1104 				}
   1105 				k++;
   1106 			}
   1107 			if (k >= ret_cnt)
   1108 				break;
   1109 		}
   1110 		resp->SECINFO4resok_len = ret_cnt;
   1111 		resp->SECINFO4resok_val = resok_val;
   1112 		kmem_free(flavor_list, count * sizeof (int));
   1113 	}
   1114 
   1115 	VN_RELE(vp);
   1116 	return (NFS4_OK);
   1117 }
   1118 
   1119 /*
   1120  * SECINFO (Operation 33): Obtain required security information on
   1121  * the component name in the format of (security-mechanism-oid, qop, service)
   1122  * triplets.
   1123  */
   1124 /* ARGSUSED */
   1125 static void
   1126 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1127     struct compound_state *cs)
   1128 {
   1129 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
   1130 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
   1131 	utf8string *utfnm = &args->name;
   1132 	uint_t len;
   1133 	char *nm;
   1134 	struct sockaddr *ca;
   1135 	char *name = NULL;
   1136 
   1137 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
   1138 	    SECINFO4args *, args);
   1139 
   1140 	/*
   1141 	 * Current file handle (cfh) should have been set before getting
   1142 	 * into this function. If not, return error.
   1143 	 */
   1144 	if (cs->vp == NULL) {
   1145 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1146 		goto out;
   1147 	}
   1148 
   1149 	if (cs->vp->v_type != VDIR) {
   1150 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1151 		goto out;
   1152 	}
   1153 
   1154 	/*
   1155 	 * Verify the component name. If failed, error out, but
   1156 	 * do not error out if the component name is a "..".
   1157 	 * SECINFO will return its parents secinfo data for SECINFO "..".
   1158 	 */
   1159 	if (!utf8_dir_verify(utfnm)) {
   1160 		if (utfnm->utf8string_len != 2 ||
   1161 		    utfnm->utf8string_val[0] != '.' ||
   1162 		    utfnm->utf8string_val[1] != '.') {
   1163 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1164 			goto out;
   1165 		}
   1166 	}
   1167 
   1168 	nm = utf8_to_str(utfnm, &len, NULL);
   1169 	if (nm == NULL) {
   1170 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1171 		goto out;
   1172 	}
   1173 
   1174 	if (len > MAXNAMELEN) {
   1175 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1176 		kmem_free(nm, len);
   1177 		goto out;
   1178 	}
   1179 
   1180 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   1181 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   1182 	    MAXPATHLEN  + 1);
   1183 
   1184 	if (name == NULL) {
   1185 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1186 		kmem_free(nm, len);
   1187 		goto out;
   1188 	}
   1189 
   1190 
   1191 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
   1192 
   1193 	if (name != nm)
   1194 		kmem_free(name, MAXPATHLEN + 1);
   1195 	kmem_free(nm, len);
   1196 
   1197 out:
   1198 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
   1199 	    SECINFO4res *, resp);
   1200 }
   1201 
   1202 /*
   1203  * Free SECINFO result.
   1204  */
   1205 /* ARGSUSED */
   1206 static void
   1207 rfs4_op_secinfo_free(nfs_resop4 *resop)
   1208 {
   1209 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
   1210 	int count, i;
   1211 	secinfo4 *resok_val;
   1212 
   1213 	/* If this is not an Ok result, nothing to free. */
   1214 	if (resp->status != NFS4_OK) {
   1215 		return;
   1216 	}
   1217 
   1218 	count = resp->SECINFO4resok_len;
   1219 	resok_val = resp->SECINFO4resok_val;
   1220 
   1221 	for (i = 0; i < count; i++) {
   1222 		if (resok_val[i].flavor == RPCSEC_GSS) {
   1223 			rpcsec_gss_info *info;
   1224 
   1225 			info = &resok_val[i].flavor_info;
   1226 			kmem_free(info->oid.sec_oid4_val,
   1227 			    info->oid.sec_oid4_len);
   1228 		}
   1229 	}
   1230 	kmem_free(resok_val, count * sizeof (secinfo4));
   1231 	resp->SECINFO4resok_len = 0;
   1232 	resp->SECINFO4resok_val = NULL;
   1233 }
   1234 
   1235 /* ARGSUSED */
   1236 static void
   1237 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1238     struct compound_state *cs)
   1239 {
   1240 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
   1241 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
   1242 	int error;
   1243 	vnode_t *vp;
   1244 	struct vattr va;
   1245 	int checkwriteperm;
   1246 	cred_t *cr = cs->cr;
   1247 	bslabel_t *clabel, *slabel;
   1248 	ts_label_t *tslabel;
   1249 	boolean_t admin_low_client;
   1250 
   1251 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
   1252 	    ACCESS4args *, args);
   1253 
   1254 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
   1255 	if (cs->access == CS_ACCESS_DENIED) {
   1256 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1257 		goto out;
   1258 	}
   1259 #endif
   1260 	if (cs->vp == NULL) {
   1261 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1262 		goto out;
   1263 	}
   1264 
   1265 	ASSERT(cr != NULL);
   1266 
   1267 	vp = cs->vp;
   1268 
   1269 	/*
   1270 	 * If the file system is exported read only, it is not appropriate
   1271 	 * to check write permissions for regular files and directories.
   1272 	 * Special files are interpreted by the client, so the underlying
   1273 	 * permissions are sent back to the client for interpretation.
   1274 	 */
   1275 	if (rdonly4(cs->exi, cs->vp, req) &&
   1276 	    (vp->v_type == VREG || vp->v_type == VDIR))
   1277 		checkwriteperm = 0;
   1278 	else
   1279 		checkwriteperm = 1;
   1280 
   1281 	/*
   1282 	 * XXX
   1283 	 * We need the mode so that we can correctly determine access
   1284 	 * permissions relative to a mandatory lock file.  Access to
   1285 	 * mandatory lock files is denied on the server, so it might
   1286 	 * as well be reflected to the server during the open.
   1287 	 */
   1288 	va.va_mask = AT_MODE;
   1289 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1290 	if (error) {
   1291 		*cs->statusp = resp->status = puterrno4(error);
   1292 		goto out;
   1293 	}
   1294 	resp->access = 0;
   1295 	resp->supported = 0;
   1296 
   1297 	if (is_system_labeled()) {
   1298 		ASSERT(req->rq_label != NULL);
   1299 		clabel = req->rq_label;
   1300 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
   1301 		    "got client label from request(1)",
   1302 		    struct svc_req *, req);
   1303 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   1304 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
   1305 				*cs->statusp = resp->status = puterrno4(EACCES);
   1306 				goto out;
   1307 			}
   1308 			slabel = label2bslabel(tslabel);
   1309 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
   1310 			    char *, "got server label(1) for vp(2)",
   1311 			    bslabel_t *, slabel, vnode_t *, vp);
   1312 
   1313 			admin_low_client = B_FALSE;
   1314 		} else
   1315 			admin_low_client = B_TRUE;
   1316 	}
   1317 
   1318 	if (args->access & ACCESS4_READ) {
   1319 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
   1320 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1321 		    (!is_system_labeled() || admin_low_client ||
   1322 		    bldominates(clabel, slabel)))
   1323 			resp->access |= ACCESS4_READ;
   1324 		resp->supported |= ACCESS4_READ;
   1325 	}
   1326 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
   1327 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1328 		if (!error && (!is_system_labeled() || admin_low_client ||
   1329 		    bldominates(clabel, slabel)))
   1330 			resp->access |= ACCESS4_LOOKUP;
   1331 		resp->supported |= ACCESS4_LOOKUP;
   1332 	}
   1333 	if (checkwriteperm &&
   1334 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
   1335 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1336 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1337 		    (!is_system_labeled() || admin_low_client ||
   1338 		    blequal(clabel, slabel)))
   1339 			resp->access |=
   1340 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
   1341 		resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
   1342 	}
   1343 
   1344 	if (checkwriteperm &&
   1345 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
   1346 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   1347 		if (!error && (!is_system_labeled() || admin_low_client ||
   1348 		    blequal(clabel, slabel)))
   1349 			resp->access |= ACCESS4_DELETE;
   1350 		resp->supported |= ACCESS4_DELETE;
   1351 	}
   1352 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
   1353 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
   1354 		if (!error && !MANDLOCK(vp, va.va_mode) &&
   1355 		    (!is_system_labeled() || admin_low_client ||
   1356 		    bldominates(clabel, slabel)))
   1357 			resp->access |= ACCESS4_EXECUTE;
   1358 		resp->supported |= ACCESS4_EXECUTE;
   1359 	}
   1360 
   1361 	if (is_system_labeled() && !admin_low_client)
   1362 		label_rele(tslabel);
   1363 
   1364 	*cs->statusp = resp->status = NFS4_OK;
   1365 out:
   1366 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
   1367 	    ACCESS4res *, resp);
   1368 }
   1369 
   1370 /* ARGSUSED */
   1371 static void
   1372 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1373     struct compound_state *cs)
   1374 {
   1375 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
   1376 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
   1377 	int error;
   1378 	vnode_t *vp = cs->vp;
   1379 	cred_t *cr = cs->cr;
   1380 	vattr_t va;
   1381 
   1382 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
   1383 	    COMMIT4args *, args);
   1384 
   1385 	if (vp == NULL) {
   1386 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1387 		goto out;
   1388 	}
   1389 	if (cs->access == CS_ACCESS_DENIED) {
   1390 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1391 		goto out;
   1392 	}
   1393 
   1394 	if (args->offset + args->count < args->offset) {
   1395 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1396 		goto out;
   1397 	}
   1398 
   1399 	va.va_mask = AT_UID;
   1400 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
   1401 
   1402 	/*
   1403 	 * If we can't get the attributes, then we can't do the
   1404 	 * right access checking.  So, we'll fail the request.
   1405 	 */
   1406 	if (error) {
   1407 		*cs->statusp = resp->status = puterrno4(error);
   1408 		goto out;
   1409 	}
   1410 	if (rdonly4(cs->exi, cs->vp, req)) {
   1411 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1412 		goto out;
   1413 	}
   1414 
   1415 	if (vp->v_type != VREG) {
   1416 		if (vp->v_type == VDIR)
   1417 			resp->status = NFS4ERR_ISDIR;
   1418 		else
   1419 			resp->status = NFS4ERR_INVAL;
   1420 		*cs->statusp = resp->status;
   1421 		goto out;
   1422 	}
   1423 
   1424 	if (crgetuid(cr) != va.va_uid &&
   1425 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
   1426 		*cs->statusp = resp->status = puterrno4(error);
   1427 		goto out;
   1428 	}
   1429 
   1430 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
   1431 	if (!error)
   1432 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
   1433 
   1434 	if (error) {
   1435 		*cs->statusp = resp->status = puterrno4(error);
   1436 		goto out;
   1437 	}
   1438 
   1439 	*cs->statusp = resp->status = NFS4_OK;
   1440 	resp->writeverf = Write4verf;
   1441 out:
   1442 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
   1443 	    COMMIT4res *, resp);
   1444 }
   1445 
   1446 /*
   1447  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
   1448  * was completed. It does the nfsv4 create for special files.
   1449  */
   1450 /* ARGSUSED */
   1451 static vnode_t *
   1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
   1453     struct compound_state *cs, vattr_t *vap, char *nm)
   1454 {
   1455 	int error;
   1456 	cred_t *cr = cs->cr;
   1457 	vnode_t *dvp = cs->vp;
   1458 	vnode_t *vp = NULL;
   1459 	int mode;
   1460 	enum vcexcl excl;
   1461 
   1462 	switch (args->type) {
   1463 	case NF4CHR:
   1464 	case NF4BLK:
   1465 		if (secpolicy_sys_devices(cr) != 0) {
   1466 			*cs->statusp = resp->status = NFS4ERR_PERM;
   1467 			return (NULL);
   1468 		}
   1469 		if (args->type == NF4CHR)
   1470 			vap->va_type = VCHR;
   1471 		else
   1472 			vap->va_type = VBLK;
   1473 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
   1474 		    args->ftype4_u.devdata.specdata2);
   1475 		vap->va_mask |= AT_RDEV;
   1476 		break;
   1477 	case NF4SOCK:
   1478 		vap->va_type = VSOCK;
   1479 		break;
   1480 	case NF4FIFO:
   1481 		vap->va_type = VFIFO;
   1482 		break;
   1483 	default:
   1484 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1485 		return (NULL);
   1486 	}
   1487 
   1488 	/*
   1489 	 * Must specify the mode.
   1490 	 */
   1491 	if (!(vap->va_mask & AT_MODE)) {
   1492 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1493 		return (NULL);
   1494 	}
   1495 
   1496 	excl = EXCL;
   1497 
   1498 	mode = 0;
   1499 
   1500 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
   1501 	if (error) {
   1502 		*cs->statusp = resp->status = puterrno4(error);
   1503 		return (NULL);
   1504 	}
   1505 	return (vp);
   1506 }
   1507 
   1508 /*
   1509  * nfsv4 create is used to create non-regular files. For regular files,
   1510  * use nfsv4 open.
   1511  */
   1512 /* ARGSUSED */
   1513 static void
   1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1515     struct compound_state *cs)
   1516 {
   1517 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
   1518 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
   1519 	int error;
   1520 	struct vattr bva, iva, iva2, ava, *vap;
   1521 	cred_t *cr = cs->cr;
   1522 	vnode_t *dvp = cs->vp;
   1523 	vnode_t *vp = NULL;
   1524 	vnode_t *realvp;
   1525 	char *nm, *lnm;
   1526 	uint_t len, llen;
   1527 	int syncval = 0;
   1528 	struct nfs4_svgetit_arg sarg;
   1529 	struct nfs4_ntov_table ntov;
   1530 	struct statvfs64 sb;
   1531 	nfsstat4 status;
   1532 	struct sockaddr *ca;
   1533 	char *name = NULL;
   1534 	char *lname = NULL;
   1535 
   1536 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
   1537 	    CREATE4args *, args);
   1538 
   1539 	resp->attrset = 0;
   1540 
   1541 	if (dvp == NULL) {
   1542 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   1543 		goto out;
   1544 	}
   1545 
   1546 	/*
   1547 	 * If there is an unshared filesystem mounted on this vnode,
   1548 	 * do not allow to create an object in this directory.
   1549 	 */
   1550 	if (vn_ismntpt(dvp)) {
   1551 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1552 		goto out;
   1553 	}
   1554 
   1555 	/* Verify that type is correct */
   1556 	switch (args->type) {
   1557 	case NF4LNK:
   1558 	case NF4BLK:
   1559 	case NF4CHR:
   1560 	case NF4SOCK:
   1561 	case NF4FIFO:
   1562 	case NF4DIR:
   1563 		break;
   1564 	default:
   1565 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
   1566 		goto out;
   1567 	};
   1568 
   1569 	if (cs->access == CS_ACCESS_DENIED) {
   1570 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   1571 		goto out;
   1572 	}
   1573 	if (dvp->v_type != VDIR) {
   1574 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   1575 		goto out;
   1576 	}
   1577 	if (!utf8_dir_verify(&args->objname)) {
   1578 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1579 		goto out;
   1580 	}
   1581 
   1582 	if (rdonly4(cs->exi, cs->vp, req)) {
   1583 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   1584 		goto out;
   1585 	}
   1586 
   1587 	/*
   1588 	 * Name of newly created object
   1589 	 */
   1590 	nm = utf8_to_fn(&args->objname, &len, NULL);
   1591 	if (nm == NULL) {
   1592 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1593 		goto out;
   1594 	}
   1595 
   1596 	if (len > MAXNAMELEN) {
   1597 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1598 		kmem_free(nm, len);
   1599 		goto out;
   1600 	}
   1601 
   1602 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   1603 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   1604 	    MAXPATHLEN  + 1);
   1605 
   1606 	if (name == NULL) {
   1607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   1608 		kmem_free(nm, len);
   1609 		goto out;
   1610 	}
   1611 
   1612 	resp->attrset = 0;
   1613 
   1614 	sarg.sbp = &sb;
   1615 	sarg.is_referral = B_FALSE;
   1616 	nfs4_ntov_table_init(&ntov);
   1617 
   1618 	status = do_rfs4_set_attrs(&resp->attrset,
   1619 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
   1620 
   1621 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
   1622 		status = NFS4ERR_INVAL;
   1623 
   1624 	if (status != NFS4_OK) {
   1625 		*cs->statusp = resp->status = status;
   1626 		kmem_free(nm, len);
   1627 		nfs4_ntov_table_free(&ntov, &sarg);
   1628 		resp->attrset = 0;
   1629 		goto out;
   1630 	}
   1631 
   1632 	/* Get "before" change value */
   1633 	bva.va_mask = AT_CTIME|AT_SEQ;
   1634 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
   1635 	if (error) {
   1636 		*cs->statusp = resp->status = puterrno4(error);
   1637 		kmem_free(nm, len);
   1638 		nfs4_ntov_table_free(&ntov, &sarg);
   1639 		resp->attrset = 0;
   1640 		goto out;
   1641 	}
   1642 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
   1643 
   1644 	vap = sarg.vap;
   1645 
   1646 	/*
   1647 	 * Set default initial values for attributes when not specified
   1648 	 * in createattrs.
   1649 	 */
   1650 	if ((vap->va_mask & AT_UID) == 0) {
   1651 		vap->va_uid = crgetuid(cr);
   1652 		vap->va_mask |= AT_UID;
   1653 	}
   1654 	if ((vap->va_mask & AT_GID) == 0) {
   1655 		vap->va_gid = crgetgid(cr);
   1656 		vap->va_mask |= AT_GID;
   1657 	}
   1658 
   1659 	vap->va_mask |= AT_TYPE;
   1660 	switch (args->type) {
   1661 	case NF4DIR:
   1662 		vap->va_type = VDIR;
   1663 		if ((vap->va_mask & AT_MODE) == 0) {
   1664 			vap->va_mode = 0700;	/* default: owner rwx only */
   1665 			vap->va_mask |= AT_MODE;
   1666 		}
   1667 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr, NULL, 0, NULL);
   1668 		if (error)
   1669 			break;
   1670 
   1671 		/*
   1672 		 * Get the initial "after" sequence number, if it fails,
   1673 		 * set to zero
   1674 		 */
   1675 		iva.va_mask = AT_SEQ;
   1676 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1677 			iva.va_seq = 0;
   1678 		break;
   1679 	case NF4LNK:
   1680 		vap->va_type = VLNK;
   1681 		if ((vap->va_mask & AT_MODE) == 0) {
   1682 			vap->va_mode = 0700;	/* default: owner rwx only */
   1683 			vap->va_mask |= AT_MODE;
   1684 		}
   1685 
   1686 		/*
   1687 		 * symlink names must be treated as data
   1688 		 */
   1689 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
   1690 
   1691 		if (lnm == NULL) {
   1692 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   1693 			if (name != nm)
   1694 				kmem_free(name, MAXPATHLEN + 1);
   1695 			kmem_free(nm, len);
   1696 			nfs4_ntov_table_free(&ntov, &sarg);
   1697 			resp->attrset = 0;
   1698 			goto out;
   1699 		}
   1700 
   1701 		if (llen > MAXPATHLEN) {
   1702 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   1703 			if (name != nm)
   1704 				kmem_free(name, MAXPATHLEN + 1);
   1705 			kmem_free(nm, len);
   1706 			kmem_free(lnm, llen);
   1707 			nfs4_ntov_table_free(&ntov, &sarg);
   1708 			resp->attrset = 0;
   1709 			goto out;
   1710 		}
   1711 
   1712 		lname = nfscmd_convname(ca, cs->exi, lnm,
   1713 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
   1714 
   1715 		if (lname == NULL) {
   1716 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   1717 			if (name != nm)
   1718 				kmem_free(name, MAXPATHLEN + 1);
   1719 			kmem_free(nm, len);
   1720 			kmem_free(lnm, llen);
   1721 			nfs4_ntov_table_free(&ntov, &sarg);
   1722 			resp->attrset = 0;
   1723 			goto out;
   1724 		}
   1725 
   1726 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, NULL, 0);
   1727 		if (lname != lnm)
   1728 			kmem_free(lname, MAXPATHLEN + 1);
   1729 		if (lnm != NULL)
   1730 			kmem_free(lnm, llen);
   1731 		if (error)
   1732 			break;
   1733 
   1734 		/*
   1735 		 * Get the initial "after" sequence number, if it fails,
   1736 		 * set to zero
   1737 		 */
   1738 		iva.va_mask = AT_SEQ;
   1739 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1740 			iva.va_seq = 0;
   1741 
   1742 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr,
   1743 		    NULL, NULL, NULL);
   1744 		if (error)
   1745 			break;
   1746 
   1747 		/*
   1748 		 * va_seq is not safe over VOP calls, check it again
   1749 		 * if it has changed zero out iva to force atomic = FALSE.
   1750 		 */
   1751 		iva2.va_mask = AT_SEQ;
   1752 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
   1753 		    iva2.va_seq != iva.va_seq)
   1754 			iva.va_seq = 0;
   1755 		break;
   1756 	default:
   1757 		/*
   1758 		 * probably a special file.
   1759 		 */
   1760 		if ((vap->va_mask & AT_MODE) == 0) {
   1761 			vap->va_mode = 0600;	/* default: owner rw only */
   1762 			vap->va_mask |= AT_MODE;
   1763 		}
   1764 		syncval = FNODSYNC;
   1765 		/*
   1766 		 * We know this will only generate one VOP call
   1767 		 */
   1768 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
   1769 
   1770 		if (vp == NULL) {
   1771 			if (name != nm)
   1772 				kmem_free(name, MAXPATHLEN + 1);
   1773 			kmem_free(nm, len);
   1774 			nfs4_ntov_table_free(&ntov, &sarg);
   1775 			resp->attrset = 0;
   1776 			goto out;
   1777 		}
   1778 
   1779 		/*
   1780 		 * Get the initial "after" sequence number, if it fails,
   1781 		 * set to zero
   1782 		 */
   1783 		iva.va_mask = AT_SEQ;
   1784 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
   1785 			iva.va_seq = 0;
   1786 
   1787 		break;
   1788 	}
   1789 	if (name != nm)
   1790 		kmem_free(name, MAXPATHLEN + 1);
   1791 	kmem_free(nm, len);
   1792 
   1793 	if (error) {
   1794 		*cs->statusp = resp->status = puterrno4(error);
   1795 	}
   1796 
   1797 	/*
   1798 	 * Force modified data and metadata out to stable storage.
   1799 	 */
   1800 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
   1801 
   1802 	if (resp->status != NFS4_OK) {
   1803 		if (vp != NULL)
   1804 			VN_RELE(vp);
   1805 		nfs4_ntov_table_free(&ntov, &sarg);
   1806 		resp->attrset = 0;
   1807 		goto out;
   1808 	}
   1809 
   1810 	/*
   1811 	 * Finish setup of cinfo response, "before" value already set.
   1812 	 * Get "after" change value, if it fails, simply return the
   1813 	 * before value.
   1814 	 */
   1815 	ava.va_mask = AT_CTIME|AT_SEQ;
   1816 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
   1817 		ava.va_ctime = bva.va_ctime;
   1818 		ava.va_seq = 0;
   1819 	}
   1820 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
   1821 
   1822 	/*
   1823 	 * True verification that object was created with correct
   1824 	 * attrs is impossible.  The attrs could have been changed
   1825 	 * immediately after object creation.  If attributes did
   1826 	 * not verify, the only recourse for the server is to
   1827 	 * destroy the object.  Maybe if some attrs (like gid)
   1828 	 * are set incorrectly, the object should be destroyed;
   1829 	 * however, seems bad as a default policy.  Do we really
   1830 	 * want to destroy an object over one of the times not
   1831 	 * verifying correctly?  For these reasons, the server
   1832 	 * currently sets bits in attrset for createattrs
   1833 	 * that were set; however, no verification is done.
   1834 	 *
   1835 	 * vmask_to_nmask accounts for vattr bits set on create
   1836 	 *	[do_rfs4_set_attrs() only sets resp bits for
   1837 	 *	 non-vattr/vfs bits.]
   1838 	 * Mask off any bits set by default so as not to return
   1839 	 * more attrset bits than were requested in createattrs
   1840 	 */
   1841 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
   1842 	resp->attrset &= args->createattrs.attrmask;
   1843 	nfs4_ntov_table_free(&ntov, &sarg);
   1844 
   1845 	error = makefh4(&cs->fh, vp, cs->exi);
   1846 	if (error) {
   1847 		*cs->statusp = resp->status = puterrno4(error);
   1848 	}
   1849 
   1850 	/*
   1851 	 * The cinfo.atomic = TRUE only if we got no errors, we have
   1852 	 * non-zero va_seq's, and it has incremented by exactly one
   1853 	 * during the creation and it didn't change during the VOP_LOOKUP
   1854 	 * or VOP_FSYNC.
   1855 	 */
   1856 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
   1857 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
   1858 		resp->cinfo.atomic = TRUE;
   1859 	else
   1860 		resp->cinfo.atomic = FALSE;
   1861 
   1862 	/*
   1863 	 * Force modified metadata out to stable storage.
   1864 	 *
   1865 	 * if a underlying vp exists, pass it to VOP_FSYNC
   1866 	 */
   1867 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
   1868 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
   1869 	else
   1870 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
   1871 
   1872 	if (resp->status != NFS4_OK) {
   1873 		VN_RELE(vp);
   1874 		goto out;
   1875 	}
   1876 	if (cs->vp)
   1877 		VN_RELE(cs->vp);
   1878 
   1879 	cs->vp = vp;
   1880 	*cs->statusp = resp->status = NFS4_OK;
   1881 out:
   1882 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
   1883 	    CREATE4res *, resp);
   1884 }
   1885 
   1886 /*ARGSUSED*/
   1887 static void
   1888 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1889     struct compound_state *cs)
   1890 {
   1891 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
   1892 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
   1893 
   1894 	rfs4_op_inval(argop, resop, req, cs);
   1895 
   1896 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
   1897 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
   1898 }
   1899 
   1900 /*ARGSUSED*/
   1901 static void
   1902 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   1903     struct compound_state *cs)
   1904 {
   1905 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
   1906 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
   1907 	rfs4_deleg_state_t *dsp;
   1908 	nfsstat4 status;
   1909 
   1910 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
   1911 	    DELEGRETURN4args *, args);
   1912 
   1913 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
   1914 	resp->status = *cs->statusp = status;
   1915 	if (status != NFS4_OK)
   1916 		goto out;
   1917 
   1918 	/* Ensure specified filehandle matches */
   1919 	if (cs->vp != dsp->rds_finfo->rf_vp) {
   1920 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
   1921 	} else
   1922 		rfs4_return_deleg(dsp, FALSE);
   1923 
   1924 	rfs4_update_lease(dsp->rds_client);
   1925 
   1926 	rfs4_deleg_state_rele(dsp);
   1927 out:
   1928 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
   1929 	    DELEGRETURN4res *, resp);
   1930 }
   1931 
   1932 /*
   1933  * Check to see if a given "flavor" is an explicitly shared flavor.
   1934  * The assumption of this routine is the "flavor" is already a valid
   1935  * flavor in the secinfo list of "exi".
   1936  *
   1937  *	e.g.
   1938  *		# share -o sec=flavor1 /export
   1939  *		# share -o sec=flavor2 /export/home
   1940  *
   1941  *		flavor2 is not an explicitly shared flavor for /export,
   1942  *		however it is in the secinfo list for /export thru the
   1943  *		server namespace setup.
   1944  */
   1945 int
   1946 is_exported_sec(int flavor, struct exportinfo *exi)
   1947 {
   1948 	int	i;
   1949 	struct secinfo *sp;
   1950 
   1951 	sp = exi->exi_export.ex_secinfo;
   1952 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
   1953 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
   1954 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
   1955 			return (SEC_REF_EXPORTED(&sp[i]));
   1956 		}
   1957 	}
   1958 
   1959 	/* Should not reach this point based on the assumption */
   1960 	return (0);
   1961 }
   1962 
   1963 /*
   1964  * Check if the security flavor used in the request matches what is
   1965  * required at the export point or at the root pseudo node (exi_root).
   1966  *
   1967  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
   1968  *
   1969  */
   1970 static int
   1971 secinfo_match_or_authnone(struct compound_state *cs)
   1972 {
   1973 	int	i;
   1974 	struct secinfo *sp;
   1975 
   1976 	/*
   1977 	 * Check cs->nfsflavor (from the request) against
   1978 	 * the current export data in cs->exi.
   1979 	 */
   1980 	sp = cs->exi->exi_export.ex_secinfo;
   1981 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
   1982 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
   1983 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
   1984 			return (1);
   1985 	}
   1986 
   1987 	return (0);
   1988 }
   1989 
   1990 /*
   1991  * Check the access authority for the client and return the correct error.
   1992  */
   1993 nfsstat4
   1994 call_checkauth4(struct compound_state *cs, struct svc_req *req)
   1995 {
   1996 	int	authres;
   1997 
   1998 	/*
   1999 	 * First, check if the security flavor used in the request
   2000 	 * are among the flavors set in the server namespace.
   2001 	 */
   2002 	if (!secinfo_match_or_authnone(cs)) {
   2003 		*cs->statusp = NFS4ERR_WRONGSEC;
   2004 		return (*cs->statusp);
   2005 	}
   2006 
   2007 	authres = checkauth4(cs, req);
   2008 
   2009 	if (authres > 0) {
   2010 		*cs->statusp = NFS4_OK;
   2011 		if (! (cs->access & CS_ACCESS_LIMITED))
   2012 			cs->access = CS_ACCESS_OK;
   2013 	} else if (authres == 0) {
   2014 		*cs->statusp = NFS4ERR_ACCESS;
   2015 	} else if (authres == -2) {
   2016 		*cs->statusp = NFS4ERR_WRONGSEC;
   2017 	} else {
   2018 		*cs->statusp = NFS4ERR_DELAY;
   2019 	}
   2020 	return (*cs->statusp);
   2021 }
   2022 
   2023 /*
   2024  * bitmap4_to_attrmask is called by getattr and readdir.
   2025  * It sets up the vattr mask and determines whether vfsstat call is needed
   2026  * based on the input bitmap.
   2027  * Returns nfsv4 status.
   2028  */
   2029 static nfsstat4
   2030 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
   2031 {
   2032 	int i;
   2033 	uint_t	va_mask;
   2034 	struct statvfs64 *sbp = sargp->sbp;
   2035 
   2036 	sargp->sbp = NULL;
   2037 	sargp->flag = 0;
   2038 	sargp->rdattr_error = NFS4_OK;
   2039 	sargp->mntdfid_set = FALSE;
   2040 	if (sargp->cs->vp)
   2041 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
   2042 		    FH4_ATTRDIR | FH4_NAMEDATTR);
   2043 	else
   2044 		sargp->xattr = 0;
   2045 
   2046 	/*
   2047 	 * Set rdattr_error_req to true if return error per
   2048 	 * failed entry rather than fail the readdir.
   2049 	 */
   2050 	if (breq & FATTR4_RDATTR_ERROR_MASK)
   2051 		sargp->rdattr_error_req = 1;
   2052 	else
   2053 		sargp->rdattr_error_req = 0;
   2054 
   2055 	/*
   2056 	 * generate the va_mask
   2057 	 * Handle the easy cases first
   2058 	 */
   2059 	switch (breq) {
   2060 	case NFS4_NTOV_ATTR_MASK:
   2061 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
   2062 		return (NFS4_OK);
   2063 
   2064 	case NFS4_FS_ATTR_MASK:
   2065 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
   2066 		sargp->sbp = sbp;
   2067 		return (NFS4_OK);
   2068 
   2069 	case NFS4_NTOV_ATTR_CACHE_MASK:
   2070 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
   2071 		return (NFS4_OK);
   2072 
   2073 	case FATTR4_LEASE_TIME_MASK:
   2074 		sargp->vap->va_mask = 0;
   2075 		return (NFS4_OK);
   2076 
   2077 	default:
   2078 		va_mask = 0;
   2079 		for (i = 0; i < nfs4_ntov_map_size; i++) {
   2080 			if ((breq & nfs4_ntov_map[i].fbit) &&
   2081 			    nfs4_ntov_map[i].vbit)
   2082 				va_mask |= nfs4_ntov_map[i].vbit;
   2083 		}
   2084 
   2085 		/*
   2086 		 * Check is vfsstat is needed
   2087 		 */
   2088 		if (breq & NFS4_FS_ATTR_MASK)
   2089 			sargp->sbp = sbp;
   2090 
   2091 		sargp->vap->va_mask = va_mask;
   2092 		return (NFS4_OK);
   2093 	}
   2094 	/* NOTREACHED */
   2095 }
   2096 
   2097 /*
   2098  * bitmap4_get_sysattrs is called by getattr and readdir.
   2099  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
   2100  * Returns nfsv4 status.
   2101  */
   2102 static nfsstat4
   2103 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
   2104 {
   2105 	int error;
   2106 	struct compound_state *cs = sargp->cs;
   2107 	vnode_t *vp = cs->vp;
   2108 
   2109 	if (sargp->sbp != NULL) {
   2110 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
   2111 			sargp->sbp = NULL;	/* to identify error */
   2112 			return (puterrno4(error));
   2113 		}
   2114 	}
   2115 
   2116 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
   2117 }
   2118 
   2119 static void
   2120 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
   2121 {
   2122 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
   2123 	    KM_SLEEP);
   2124 	ntovp->attrcnt = 0;
   2125 	ntovp->vfsstat = FALSE;
   2126 }
   2127 
   2128 static void
   2129 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
   2130     struct nfs4_svgetit_arg *sargp)
   2131 {
   2132 	int i;
   2133 	union nfs4_attr_u *na;
   2134 	uint8_t *amap;
   2135 
   2136 	/*
   2137 	 * XXX Should do the same checks for whether the bit is set
   2138 	 */
   2139 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
   2140 	    i < ntovp->attrcnt; i++, na++, amap++) {
   2141 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
   2142 		    NFS4ATTR_FREEIT, sargp, na);
   2143 	}
   2144 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
   2145 		/*
   2146 		 * xdr_free for getattr will be done later
   2147 		 */
   2148 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
   2149 		    i < ntovp->attrcnt; i++, na++, amap++) {
   2150 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
   2151 		}
   2152 	}
   2153 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
   2154 }
   2155 
   2156 /*
   2157  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
   2158  */
   2159 static nfsstat4
   2160 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
   2161     struct nfs4_svgetit_arg *sargp)
   2162 {
   2163 	int error = 0;
   2164 	int i, k;
   2165 	struct nfs4_ntov_table ntov;
   2166 	XDR xdr;
   2167 	ulong_t xdr_size;
   2168 	char *xdr_attrs;
   2169 	nfsstat4 status = NFS4_OK;
   2170 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
   2171 	union nfs4_attr_u *na;
   2172 	uint8_t *amap;
   2173 
   2174 	sargp->op = NFS4ATTR_GETIT;
   2175 	sargp->flag = 0;
   2176 
   2177 	fattrp->attrmask = 0;
   2178 	/* if no bits requested, then return empty fattr4 */
   2179 	if (breq == 0) {
   2180 		fattrp->attrlist4_len = 0;
   2181 		fattrp->attrlist4 = NULL;
   2182 		return (NFS4_OK);
   2183 	}
   2184 
   2185 	/*
   2186 	 * return NFS4ERR_INVAL when client requests write-only attrs
   2187 	 */
   2188 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
   2189 		return (NFS4ERR_INVAL);
   2190 
   2191 	nfs4_ntov_table_init(&ntov);
   2192 	na = ntov.na;
   2193 	amap = ntov.amap;
   2194 
   2195 	/*
   2196 	 * Now loop to get or verify the attrs
   2197 	 */
   2198 	for (i = 0; i < nfs4_ntov_map_size; i++) {
   2199 		if (breq & nfs4_ntov_map[i].fbit) {
   2200 			if ((*nfs4_ntov_map[i].sv_getit)(
   2201 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
   2202 
   2203 				error = (*nfs4_ntov_map[i].sv_getit)(
   2204 				    NFS4ATTR_GETIT, sargp, na);
   2205 
   2206 				/*
   2207 				 * Possible error values:
   2208 				 * >0 if sv_getit failed to
   2209 				 * get the attr; 0 if succeeded;
   2210 				 * <0 if rdattr_error and the
   2211 				 * attribute cannot be returned.
   2212 				 */
   2213 				if (error && !(sargp->rdattr_error_req))
   2214 					goto done;
   2215 				/*
   2216 				 * If error then just for entry
   2217 				 */
   2218 				if (error == 0) {
   2219 					fattrp->attrmask |=
   2220 					    nfs4_ntov_map[i].fbit;
   2221 					*amap++ =
   2222 					    (uint8_t)nfs4_ntov_map[i].nval;
   2223 					na++;
   2224 					(ntov.attrcnt)++;
   2225 				} else if ((error > 0) &&
   2226 				    (sargp->rdattr_error == NFS4_OK)) {
   2227 					sargp->rdattr_error = puterrno4(error);
   2228 				}
   2229 				error = 0;
   2230 			}
   2231 		}
   2232 	}
   2233 
   2234 	/*
   2235 	 * If rdattr_error was set after the return value for it was assigned,
   2236 	 * update it.
   2237 	 */
   2238 	if (prev_rdattr_error != sargp->rdattr_error) {
   2239 		na = ntov.na;
   2240 		amap = ntov.amap;
   2241 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2242 			k = *amap;
   2243 			if (k < FATTR4_RDATTR_ERROR) {
   2244 				continue;
   2245 			}
   2246 			if ((k == FATTR4_RDATTR_ERROR) &&
   2247 			    ((*nfs4_ntov_map[k].sv_getit)(
   2248 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
   2249 
   2250 				(void) (*nfs4_ntov_map[k].sv_getit)(
   2251 				    NFS4ATTR_GETIT, sargp, na);
   2252 			}
   2253 			break;
   2254 		}
   2255 	}
   2256 
   2257 	xdr_size = 0;
   2258 	na = ntov.na;
   2259 	amap = ntov.amap;
   2260 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2261 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
   2262 	}
   2263 
   2264 	fattrp->attrlist4_len = xdr_size;
   2265 	if (xdr_size) {
   2266 		/* freed by rfs4_op_getattr_free() */
   2267 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
   2268 
   2269 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
   2270 
   2271 		na = ntov.na;
   2272 		amap = ntov.amap;
   2273 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
   2274 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
   2275 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
   2276 				    int, *amap);
   2277 				status = NFS4ERR_SERVERFAULT;
   2278 				break;
   2279 			}
   2280 		}
   2281 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
   2282 	} else {
   2283 		fattrp->attrlist4 = NULL;
   2284 	}
   2285 done:
   2286 
   2287 	nfs4_ntov_table_free(&ntov, sargp);
   2288 
   2289 	if (error != 0)
   2290 		status = puterrno4(error);
   2291 
   2292 	return (status);
   2293 }
   2294 
   2295 /* ARGSUSED */
   2296 static void
   2297 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2298     struct compound_state *cs)
   2299 {
   2300 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
   2301 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   2302 	struct nfs4_svgetit_arg sarg;
   2303 	struct statvfs64 sb;
   2304 	nfsstat4 status;
   2305 
   2306 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
   2307 	    GETATTR4args *, args);
   2308 
   2309 	if (cs->vp == NULL) {
   2310 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2311 		goto out;
   2312 	}
   2313 
   2314 	if (cs->access == CS_ACCESS_DENIED) {
   2315 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2316 		goto out;
   2317 	}
   2318 
   2319 	sarg.sbp = &sb;
   2320 	sarg.cs = cs;
   2321 	sarg.is_referral = B_FALSE;
   2322 
   2323 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
   2324 	if (status == NFS4_OK) {
   2325 
   2326 		status = bitmap4_get_sysattrs(&sarg);
   2327 		if (status == NFS4_OK) {
   2328 
   2329 			/* Is this a referral? */
   2330 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
   2331 				/* Older V4 Solaris client sees a link */
   2332 				if (client_is_downrev(req))
   2333 					sarg.vap->va_type = VLNK;
   2334 				else
   2335 					sarg.is_referral = B_TRUE;
   2336 			}
   2337 
   2338 			status = do_rfs4_op_getattr(args->attr_request,
   2339 			    &resp->obj_attributes, &sarg);
   2340 		}
   2341 	}
   2342 	*cs->statusp = resp->status = status;
   2343 out:
   2344 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
   2345 	    GETATTR4res *, resp);
   2346 }
   2347 
   2348 static void
   2349 rfs4_op_getattr_free(nfs_resop4 *resop)
   2350 {
   2351 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
   2352 
   2353 	nfs4_fattr4_free(&resp->obj_attributes);
   2354 }
   2355 
   2356 /* ARGSUSED */
   2357 static void
   2358 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2359     struct compound_state *cs)
   2360 {
   2361 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   2362 
   2363 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
   2364 
   2365 	if (cs->vp == NULL) {
   2366 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2367 		goto out;
   2368 	}
   2369 	if (cs->access == CS_ACCESS_DENIED) {
   2370 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2371 		goto out;
   2372 	}
   2373 
   2374 	/* check for reparse point at the share point */
   2375 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
   2376 		/* it's all bad */
   2377 		cs->exi->exi_moved = 1;
   2378 		*cs->statusp = resp->status = NFS4ERR_MOVED;
   2379 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
   2380 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
   2381 		return;
   2382 	}
   2383 
   2384 	/* check for reparse point at vp */
   2385 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
   2386 		/* it's not all bad */
   2387 		*cs->statusp = resp->status = NFS4ERR_MOVED;
   2388 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
   2389 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
   2390 		return;
   2391 	}
   2392 
   2393 	resp->object.nfs_fh4_val =
   2394 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
   2395 	nfs_fh4_copy(&cs->fh, &resp->object);
   2396 	*cs->statusp = resp->status = NFS4_OK;
   2397 out:
   2398 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
   2399 	    GETFH4res *, resp);
   2400 }
   2401 
   2402 static void
   2403 rfs4_op_getfh_free(nfs_resop4 *resop)
   2404 {
   2405 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
   2406 
   2407 	if (resp->status == NFS4_OK &&
   2408 	    resp->object.nfs_fh4_val != NULL) {
   2409 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
   2410 		resp->object.nfs_fh4_val = NULL;
   2411 		resp->object.nfs_fh4_len = 0;
   2412 	}
   2413 }
   2414 
   2415 /*
   2416  * illegal: args: void
   2417  *	    res : status (NFS4ERR_OP_ILLEGAL)
   2418  */
   2419 /* ARGSUSED */
   2420 static void
   2421 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
   2422     struct svc_req *req, struct compound_state *cs)
   2423 {
   2424 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
   2425 
   2426 	resop->resop = OP_ILLEGAL;
   2427 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
   2428 }
   2429 
   2430 /*
   2431  * link: args: SAVED_FH: file, CURRENT_FH: target directory
   2432  *	 res: status. If success - CURRENT_FH unchanged, return change_info
   2433  */
   2434 /* ARGSUSED */
   2435 static void
   2436 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2437     struct compound_state *cs)
   2438 {
   2439 	LINK4args *args = &argop->nfs_argop4_u.oplink;
   2440 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
   2441 	int error;
   2442 	vnode_t *vp;
   2443 	vnode_t *dvp;
   2444 	struct vattr bdva, idva, adva;
   2445 	char *nm;
   2446 	uint_t  len;
   2447 	struct sockaddr *ca;
   2448 	char *name = NULL;
   2449 
   2450 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
   2451 	    LINK4args *, args);
   2452 
   2453 	/* SAVED_FH: source object */
   2454 	vp = cs->saved_vp;
   2455 	if (vp == NULL) {
   2456 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2457 		goto out;
   2458 	}
   2459 
   2460 	/* CURRENT_FH: target directory */
   2461 	dvp = cs->vp;
   2462 	if (dvp == NULL) {
   2463 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2464 		goto out;
   2465 	}
   2466 
   2467 	/*
   2468 	 * If there is a non-shared filesystem mounted on this vnode,
   2469 	 * do not allow to link any file in this directory.
   2470 	 */
   2471 	if (vn_ismntpt(dvp)) {
   2472 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2473 		goto out;
   2474 	}
   2475 
   2476 	if (cs->access == CS_ACCESS_DENIED) {
   2477 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   2478 		goto out;
   2479 	}
   2480 
   2481 	/* Check source object's type validity */
   2482 	if (vp->v_type == VDIR) {
   2483 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
   2484 		goto out;
   2485 	}
   2486 
   2487 	/* Check target directory's type */
   2488 	if (dvp->v_type != VDIR) {
   2489 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2490 		goto out;
   2491 	}
   2492 
   2493 	if (cs->saved_exi != cs->exi) {
   2494 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   2495 		goto out;
   2496 	}
   2497 
   2498 	if (!utf8_dir_verify(&args->newname)) {
   2499 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2500 		goto out;
   2501 	}
   2502 
   2503 	nm = utf8_to_fn(&args->newname, &len, NULL);
   2504 	if (nm == NULL) {
   2505 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2506 		goto out;
   2507 	}
   2508 
   2509 	if (len > MAXNAMELEN) {
   2510 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2511 		kmem_free(nm, len);
   2512 		goto out;
   2513 	}
   2514 
   2515 	if (rdonly4(cs->exi, cs->vp, req)) {
   2516 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   2517 		kmem_free(nm, len);
   2518 		goto out;
   2519 	}
   2520 
   2521 	/* Get "before" change value */
   2522 	bdva.va_mask = AT_CTIME|AT_SEQ;
   2523 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
   2524 	if (error) {
   2525 		*cs->statusp = resp->status = puterrno4(error);
   2526 		kmem_free(nm, len);
   2527 		goto out;
   2528 	}
   2529 
   2530 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   2531 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   2532 	    MAXPATHLEN  + 1);
   2533 
   2534 	if (name == NULL) {
   2535 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2536 		kmem_free(nm, len);
   2537 		goto out;
   2538 	}
   2539 
   2540 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   2541 
   2542 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
   2543 
   2544 	if (nm != name)
   2545 		kmem_free(name, MAXPATHLEN + 1);
   2546 	kmem_free(nm, len);
   2547 
   2548 	/*
   2549 	 * Get the initial "after" sequence number, if it fails, set to zero
   2550 	 */
   2551 	idva.va_mask = AT_SEQ;
   2552 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
   2553 		idva.va_seq = 0;
   2554 
   2555 	/*
   2556 	 * Force modified data and metadata out to stable storage.
   2557 	 */
   2558 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
   2559 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   2560 
   2561 	if (error) {
   2562 		*cs->statusp = resp->status = puterrno4(error);
   2563 		goto out;
   2564 	}
   2565 
   2566 	/*
   2567 	 * Get "after" change value, if it fails, simply return the
   2568 	 * before value.
   2569 	 */
   2570 	adva.va_mask = AT_CTIME|AT_SEQ;
   2571 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
   2572 		adva.va_ctime = bdva.va_ctime;
   2573 		adva.va_seq = 0;
   2574 	}
   2575 
   2576 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   2577 
   2578 	/*
   2579 	 * The cinfo.atomic = TRUE only if we have
   2580 	 * non-zero va_seq's, and it has incremented by exactly one
   2581 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
   2582 	 */
   2583 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   2584 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
   2585 		resp->cinfo.atomic = TRUE;
   2586 	else
   2587 		resp->cinfo.atomic = FALSE;
   2588 
   2589 	*cs->statusp = resp->status = NFS4_OK;
   2590 out:
   2591 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
   2592 	    LINK4res *, resp);
   2593 }
   2594 
   2595 /*
   2596  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
   2597  */
   2598 
   2599 /* ARGSUSED */
   2600 static nfsstat4
   2601 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
   2602 {
   2603 	int error;
   2604 	int different_export = 0;
   2605 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
   2606 	struct exportinfo *exi = NULL, *pre_exi = NULL;
   2607 	nfsstat4 stat;
   2608 	fid_t fid;
   2609 	int attrdir, dotdot, walk;
   2610 	bool_t is_newvp = FALSE;
   2611 
   2612 	if (cs->vp->v_flag & V_XATTRDIR) {
   2613 		attrdir = 1;
   2614 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
   2615 	} else {
   2616 		attrdir = 0;
   2617 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
   2618 	}
   2619 
   2620 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
   2621 
   2622 	/*
   2623 	 * If dotdotting, then need to check whether it's
   2624 	 * above the root of a filesystem, or above an
   2625 	 * export point.
   2626 	 */
   2627 	if (dotdot) {
   2628 
   2629 		/*
   2630 		 * If dotdotting at the root of a filesystem, then
   2631 		 * need to traverse back to the mounted-on filesystem
   2632 		 * and do the dotdot lookup there.
   2633 		 */
   2634 		if (cs->vp->v_flag & VROOT) {
   2635 
   2636 			/*
   2637 			 * If at the system root, then can
   2638 			 * go up no further.
   2639 			 */
   2640 			if (VN_CMP(cs->vp, rootdir))
   2641 				return (puterrno4(ENOENT));
   2642 
   2643 			/*
   2644 			 * Traverse back to the mounted-on filesystem
   2645 			 */
   2646 			cs->vp = untraverse(cs->vp);
   2647 
   2648 			/*
   2649 			 * Set the different_export flag so we remember
   2650 			 * to pick up a new exportinfo entry for
   2651 			 * this new filesystem.
   2652 			 */
   2653 			different_export = 1;
   2654 		} else {
   2655 
   2656 			/*
   2657 			 * If dotdotting above an export point then set
   2658 			 * the different_export to get new export info.
   2659 			 */
   2660 			different_export = nfs_exported(cs->exi, cs->vp);
   2661 		}
   2662 	}
   2663 
   2664 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
   2665 	    NULL, NULL, NULL);
   2666 	if (error)
   2667 		return (puterrno4(error));
   2668 
   2669 	/*
   2670 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
   2671 	 *
   2672 	 * XXX if the vnode is a symlink and it is not visible in
   2673 	 * a pseudo filesystem, return ENOENT (not following symlink).
   2674 	 * V4 client can not mount such symlink. This is a regression
   2675 	 * from V2/V3.
   2676 	 *
   2677 	 * In the same exported filesystem, if the security flavor used
   2678 	 * is not an explicitly shared flavor, limit the view to the visible
   2679 	 * list entries only. This is not a WRONGSEC case because it's already
   2680 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
   2681 	 */
   2682 	if (!different_export &&
   2683 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
   2684 	    cs->access & CS_ACCESS_LIMITED)) {
   2685 		if (! nfs_visible(cs->exi, vp, &different_export)) {
   2686 			VN_RELE(vp);
   2687 			return (puterrno4(ENOENT));
   2688 		}
   2689 	}
   2690 
   2691 	/*
   2692 	 * If it's a mountpoint, then traverse it.
   2693 	 */
   2694 	if (vn_ismntpt(vp)) {
   2695 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
   2696 		pre_tvp = vp;		/* save pre-traversed vnode	*/
   2697 
   2698 		/*
   2699 		 * hold pre_tvp to counteract rele by traverse.  We will
   2700 		 * need pre_tvp below if checkexport4 fails
   2701 		 */
   2702 		VN_HOLD(pre_tvp);
   2703 		tvp = vp;
   2704 		if ((error = traverse(&tvp)) != 0) {
   2705 			VN_RELE(vp);
   2706 			VN_RELE(pre_tvp);
   2707 			return (puterrno4(error));
   2708 		}
   2709 		vp = tvp;
   2710 		different_export = 1;
   2711 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
   2712 		/*
   2713 		 * The vfsp comparison is to handle the case where
   2714 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
   2715 		 * and NFS is unaware of local fs transistions because
   2716 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
   2717 		 * the dir and the obj returned by lookup will have different
   2718 		 * vfs ptrs.
   2719 		 */
   2720 		different_export = 1;
   2721 	}
   2722 
   2723 	if (different_export) {
   2724 
   2725 		bzero(&fid, sizeof (fid));
   2726 		fid.fid_len = MAXFIDSZ;
   2727 		error = vop_fid_pseudo(vp, &fid);
   2728 		if (error) {
   2729 			VN_RELE(vp);
   2730 			if (pre_tvp)
   2731 				VN_RELE(pre_tvp);
   2732 			return (puterrno4(error));
   2733 		}
   2734 
   2735 		if (dotdot)
   2736 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
   2737 		else
   2738 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
   2739 
   2740 		if (exi == NULL) {
   2741 			if (pre_tvp) {
   2742 				/*
   2743 				 * If this vnode is a mounted-on vnode,
   2744 				 * but the mounted-on file system is not
   2745 				 * exported, send back the filehandle for
   2746 				 * the mounted-on vnode, not the root of
   2747 				 * the mounted-on file system.
   2748 				 */
   2749 				VN_RELE(vp);
   2750 				vp = pre_tvp;
   2751 				exi = pre_exi;
   2752 			} else {
   2753 				VN_RELE(vp);
   2754 				return (puterrno4(EACCES));
   2755 			}
   2756 		} else if (pre_tvp) {
   2757 			/* we're done with pre_tvp now. release extra hold */
   2758 			VN_RELE(pre_tvp);
   2759 		}
   2760 
   2761 		cs->exi = exi;
   2762 
   2763 		/*
   2764 		 * Now we do a checkauth4. The reason is that
   2765 		 * this client/user may not have access to the new
   2766 		 * exported file system, and if he does,
   2767 		 * the client/user may be mapped to a different uid.
   2768 		 *
   2769 		 * We start with a new cr, because the checkauth4 done
   2770 		 * in the PUT*FH operation over wrote the cred's uid,
   2771 		 * gid, etc, and we want the real thing before calling
   2772 		 * checkauth4()
   2773 		 */
   2774 		crfree(cs->cr);
   2775 		cs->cr = crdup(cs->basecr);
   2776 
   2777 		if (cs->vp)
   2778 			oldvp = cs->vp;
   2779 		cs->vp = vp;
   2780 		is_newvp = TRUE;
   2781 
   2782 		stat = call_checkauth4(cs, req);
   2783 		if (stat != NFS4_OK) {
   2784 			VN_RELE(cs->vp);
   2785 			cs->vp = oldvp;
   2786 			return (stat);
   2787 		}
   2788 	}
   2789 
   2790 	/*
   2791 	 * After various NFS checks, do a label check on the path
   2792 	 * component. The label on this path should either be the
   2793 	 * global zone's label or a zone's label. We are only
   2794 	 * interested in the zone's label because exported files
   2795 	 * in global zone is accessible (though read-only) to
   2796 	 * clients. The exportability/visibility check is already
   2797 	 * done before reaching this code.
   2798 	 */
   2799 	if (is_system_labeled()) {
   2800 		bslabel_t *clabel;
   2801 
   2802 		ASSERT(req->rq_label != NULL);
   2803 		clabel = req->rq_label;
   2804 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
   2805 		    "got client label from request(1)", struct svc_req *, req);
   2806 
   2807 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   2808 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
   2809 			    cs->exi)) {
   2810 				error = EACCES;
   2811 				goto err_out;
   2812 			}
   2813 		} else {
   2814 			/*
   2815 			 * We grant access to admin_low label clients
   2816 			 * only if the client is trusted, i.e. also
   2817 			 * running Solaris Trusted Extension.
   2818 			 */
   2819 			struct sockaddr	*ca;
   2820 			int		addr_type;
   2821 			void		*ipaddr;
   2822 			tsol_tpc_t	*tp;
   2823 
   2824 			ca = (struct sockaddr *)svc_getrpccaller(
   2825 			    req->rq_xprt)->buf;
   2826 			if (ca->sa_family == AF_INET) {
   2827 				addr_type = IPV4_VERSION;
   2828 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
   2829 			} else if (ca->sa_family == AF_INET6) {
   2830 				addr_type = IPV6_VERSION;
   2831 				ipaddr = &((struct sockaddr_in6 *)
   2832 				    ca)->sin6_addr;
   2833 			}
   2834 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
   2835 			if (tp == NULL || tp->tpc_tp.tp_doi !=
   2836 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
   2837 			    SUN_CIPSO) {
   2838 				if (tp != NULL)
   2839 					TPC_RELE(tp);
   2840 				error = EACCES;
   2841 				goto err_out;
   2842 			}
   2843 			TPC_RELE(tp);
   2844 		}
   2845 	}
   2846 
   2847 	error = makefh4(&cs->fh, vp, cs->exi);
   2848 
   2849 err_out:
   2850 	if (error) {
   2851 		if (is_newvp) {
   2852 			VN_RELE(cs->vp);
   2853 			cs->vp = oldvp;
   2854 		} else
   2855 			VN_RELE(vp);
   2856 		return (puterrno4(error));
   2857 	}
   2858 
   2859 	if (!is_newvp) {
   2860 		if (cs->vp)
   2861 			VN_RELE(cs->vp);
   2862 		cs->vp = vp;
   2863 	} else if (oldvp)
   2864 		VN_RELE(oldvp);
   2865 
   2866 	/*
   2867 	 * if did lookup on attrdir and didn't lookup .., set named
   2868 	 * attr fh flag
   2869 	 */
   2870 	if (attrdir && ! dotdot)
   2871 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
   2872 
   2873 	/* Assume false for now, open proc will set this */
   2874 	cs->mandlock = FALSE;
   2875 
   2876 	return (NFS4_OK);
   2877 }
   2878 
   2879 /* ARGSUSED */
   2880 static void
   2881 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2882     struct compound_state *cs)
   2883 {
   2884 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
   2885 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
   2886 	char *nm;
   2887 	uint_t len;
   2888 	struct sockaddr *ca;
   2889 	char *name = NULL;
   2890 
   2891 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
   2892 	    LOOKUP4args *, args);
   2893 
   2894 	if (cs->vp == NULL) {
   2895 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2896 		goto out;
   2897 	}
   2898 
   2899 	if (cs->vp->v_type == VLNK) {
   2900 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
   2901 		goto out;
   2902 	}
   2903 
   2904 	if (cs->vp->v_type != VDIR) {
   2905 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2906 		goto out;
   2907 	}
   2908 
   2909 	if (!utf8_dir_verify(&args->objname)) {
   2910 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2911 		goto out;
   2912 	}
   2913 
   2914 	nm = utf8_to_str(&args->objname, &len, NULL);
   2915 	if (nm == NULL) {
   2916 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2917 		goto out;
   2918 	}
   2919 
   2920 	if (len > MAXNAMELEN) {
   2921 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   2922 		kmem_free(nm, len);
   2923 		goto out;
   2924 	}
   2925 
   2926 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   2927 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   2928 	    MAXPATHLEN  + 1);
   2929 
   2930 	if (name == NULL) {
   2931 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   2932 		kmem_free(nm, len);
   2933 		goto out;
   2934 	}
   2935 
   2936 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
   2937 
   2938 	if (name != nm)
   2939 		kmem_free(name, MAXPATHLEN + 1);
   2940 	kmem_free(nm, len);
   2941 
   2942 out:
   2943 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
   2944 	    LOOKUP4res *, resp);
   2945 }
   2946 
   2947 /* ARGSUSED */
   2948 static void
   2949 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   2950     struct compound_state *cs)
   2951 {
   2952 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
   2953 
   2954 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
   2955 
   2956 	if (cs->vp == NULL) {
   2957 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2958 		goto out;
   2959 	}
   2960 
   2961 	if (cs->vp->v_type != VDIR) {
   2962 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   2963 		goto out;
   2964 	}
   2965 
   2966 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
   2967 
   2968 	/*
   2969 	 * From NFSV4 Specification, LOOKUPP should not check for
   2970 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
   2971 	 */
   2972 	if (resp->status == NFS4ERR_WRONGSEC) {
   2973 		*cs->statusp = resp->status = NFS4_OK;
   2974 	}
   2975 
   2976 out:
   2977 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
   2978 	    LOOKUPP4res *, resp);
   2979 }
   2980 
   2981 
   2982 /*ARGSUSED2*/
   2983 static void
   2984 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   2985     struct compound_state *cs)
   2986 {
   2987 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
   2988 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
   2989 	vnode_t		*avp = NULL;
   2990 	int		lookup_flags = LOOKUP_XATTR, error;
   2991 	int		exp_ro = 0;
   2992 
   2993 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
   2994 	    OPENATTR4args *, args);
   2995 
   2996 	if (cs->vp == NULL) {
   2997 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   2998 		goto out;
   2999 	}
   3000 
   3001 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
   3002 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
   3003 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
   3004 		goto out;
   3005 	}
   3006 
   3007 	/*
   3008 	 * If file system supports passing ACE mask to VOP_ACCESS then
   3009 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
   3010 	 */
   3011 
   3012 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
   3013 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
   3014 		    V_ACE_MASK, cs->cr, NULL);
   3015 	else
   3016 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
   3017 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
   3018 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
   3019 
   3020 	if (error) {
   3021 		*cs->statusp = resp->status = puterrno4(EACCES);
   3022 		goto out;
   3023 	}
   3024 
   3025 	/*
   3026 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
   3027 	 * the file system is exported read-only -- regardless of
   3028 	 * createdir flag.  Otherwise the attrdir would be created
   3029 	 * (assuming server fs isn't mounted readonly locally).  If
   3030 	 * VOP_LOOKUP returns ENOENT in this case, the error will
   3031 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
   3032 	 * because specfs has no VOP_LOOKUP op, so the macro would
   3033 	 * return ENOSYS.  EINVAL is returned by all (current)
   3034 	 * Solaris file system implementations when any of their
   3035 	 * restrictions are violated (xattr(dir) can't have xattrdir).
   3036 	 * Returning NOTSUPP is more appropriate in this case
   3037 	 * because the object will never be able to have an attrdir.
   3038 	 */
   3039 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
   3040 		lookup_flags |= CREATE_XATTR_DIR;
   3041 
   3042 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
   3043 	    NULL, NULL, NULL);
   3044 
   3045 	if (error) {
   3046 		if (error == ENOENT && args->createdir && exp_ro)
   3047 			*cs->statusp = resp->status = puterrno4(EROFS);
   3048 		else if (error == EINVAL || error == ENOSYS)
   3049 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
   3050 		else
   3051 			*cs->statusp = resp->status = puterrno4(error);
   3052 		goto out;
   3053 	}
   3054 
   3055 	ASSERT(avp->v_flag & V_XATTRDIR);
   3056 
   3057 	error = makefh4(&cs->fh, avp, cs->exi);
   3058 
   3059 	if (error) {
   3060 		VN_RELE(avp);
   3061 		*cs->statusp = resp->status = puterrno4(error);
   3062 		goto out;
   3063 	}
   3064 
   3065 	VN_RELE(cs->vp);
   3066 	cs->vp = avp;
   3067 
   3068 	/*
   3069 	 * There is no requirement for an attrdir fh flag
   3070 	 * because the attrdir has a vnode flag to distinguish
   3071 	 * it from regular (non-xattr) directories.  The
   3072 	 * FH4_ATTRDIR flag is set for future sanity checks.
   3073 	 */
   3074 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
   3075 	*cs->statusp = resp->status = NFS4_OK;
   3076 
   3077 out:
   3078 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
   3079 	    OPENATTR4res *, resp);
   3080 }
   3081 
   3082 static int
   3083 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
   3084     caller_context_t *ct)
   3085 {
   3086 	int error;
   3087 	int i;
   3088 	clock_t delaytime;
   3089 
   3090 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
   3091 
   3092 	/*
   3093 	 * Don't block on mandatory locks. If this routine returns
   3094 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
   3095 	 */
   3096 	uio->uio_fmode = FNONBLOCK;
   3097 
   3098 	for (i = 0; i < rfs4_maxlock_tries; i++) {
   3099 
   3100 
   3101 		if (direction == FREAD) {
   3102 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
   3103 			error = VOP_READ(vp, uio, ioflag, cred, ct);
   3104 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
   3105 		} else {
   3106 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
   3107 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
   3108 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
   3109 		}
   3110 
   3111 		if (error != EAGAIN)
   3112 			break;
   3113 
   3114 		if (i < rfs4_maxlock_tries - 1) {
   3115 			delay(delaytime);
   3116 			delaytime *= 2;
   3117 		}
   3118 	}
   3119 
   3120 	return (error);
   3121 }
   3122 
   3123 /* ARGSUSED */
   3124 static void
   3125 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3126     struct compound_state *cs)
   3127 {
   3128 	READ4args *args = &argop->nfs_argop4_u.opread;
   3129 	READ4res *resp = &resop->nfs_resop4_u.opread;
   3130 	int error;
   3131 	int verror;
   3132 	vnode_t *vp;
   3133 	struct vattr va;
   3134 	struct iovec iov;
   3135 	struct uio uio;
   3136 	u_offset_t offset;
   3137 	bool_t *deleg = &cs->deleg;
   3138 	nfsstat4 stat;
   3139 	int in_crit = 0;
   3140 	mblk_t *mp = NULL;
   3141 	int alloc_err = 0;
   3142 	int rdma_used = 0;
   3143 	int loaned_buffers;
   3144 	caller_context_t ct;
   3145 	struct uio *uiop;
   3146 
   3147 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
   3148 	    READ4args, args);
   3149 
   3150 	vp = cs->vp;
   3151 	if (vp == NULL) {
   3152 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3153 		goto out;
   3154 	}
   3155 	if (cs->access == CS_ACCESS_DENIED) {
   3156 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3157 		goto out;
   3158 	}
   3159 
   3160 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
   3161 	    deleg, TRUE, &ct)) != NFS4_OK) {
   3162 		*cs->statusp = resp->status = stat;
   3163 		goto out;
   3164 	}
   3165 
   3166 	/*
   3167 	 * Enter the critical region before calling VOP_RWLOCK
   3168 	 * to avoid a deadlock with write requests.
   3169 	 */
   3170 	if (nbl_need_check(vp)) {
   3171 		nbl_start_crit(vp, RW_READER);
   3172 		in_crit = 1;
   3173 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
   3174 		    &ct)) {
   3175 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
   3176 			goto out;
   3177 		}
   3178 	}
   3179 
   3180 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
   3181 	    deleg, TRUE, &ct)) != NFS4_OK) {
   3182 		*cs->statusp = resp->status = stat;
   3183 		goto out;
   3184 	}
   3185 
   3186 	if (args->wlist)
   3187 		rdma_used = 1;
   3188 
   3189 	/* use loaned buffers for TCP */
   3190 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
   3191 
   3192 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
   3193 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3194 
   3195 	/*
   3196 	 * If we can't get the attributes, then we can't do the
   3197 	 * right access checking.  So, we'll fail the request.
   3198 	 */
   3199 	if (verror) {
   3200 		*cs->statusp = resp->status = puterrno4(verror);
   3201 		goto out;
   3202 	}
   3203 
   3204 	if (vp->v_type != VREG) {
   3205 		*cs->statusp = resp->status =
   3206 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
   3207 		goto out;
   3208 	}
   3209 
   3210 	if (crgetuid(cs->cr) != va.va_uid &&
   3211 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
   3212 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
   3213 		*cs->statusp = resp->status = puterrno4(error);
   3214 		goto out;
   3215 	}
   3216 
   3217 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
   3218 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3219 		goto out;
   3220 	}
   3221 
   3222 	offset = args->offset;
   3223 	if (offset >= va.va_size) {
   3224 		*cs->statusp = resp->status = NFS4_OK;
   3225 		resp->eof = TRUE;
   3226 		resp->data_len = 0;
   3227 		resp->data_val = NULL;
   3228 		resp->mblk = NULL;
   3229 		/* RDMA */
   3230 		resp->wlist = args->wlist;
   3231 		resp->wlist_len = resp->data_len;
   3232 		*cs->statusp = resp->status = NFS4_OK;
   3233 		if (resp->wlist)
   3234 			clist_zero_len(resp->wlist);
   3235 		goto out;
   3236 	}
   3237 
   3238 	if (args->count == 0) {
   3239 		*cs->statusp = resp->status = NFS4_OK;
   3240 		resp->eof = FALSE;
   3241 		resp->data_len = 0;
   3242 		resp->data_val = NULL;
   3243 		resp->mblk = NULL;
   3244 		/* RDMA */
   3245 		resp->wlist = args->wlist;
   3246 		resp->wlist_len = resp->data_len;
   3247 		if (resp->wlist)
   3248 			clist_zero_len(resp->wlist);
   3249 		goto out;
   3250 	}
   3251 
   3252 	/*
   3253 	 * Do not allocate memory more than maximum allowed
   3254 	 * transfer size
   3255 	 */
   3256 	if (args->count > rfs4_tsize(req))
   3257 		args->count = rfs4_tsize(req);
   3258 
   3259 	if (loaned_buffers) {
   3260 		uiop = (uio_t *)rfs_setup_xuio(vp);
   3261 		ASSERT(uiop != NULL);
   3262 		uiop->uio_segflg = UIO_SYSSPACE;
   3263 		uiop->uio_loffset = args->offset;
   3264 		uiop->uio_resid = args->count;
   3265 
   3266 		/* Jump to do the read if successful */
   3267 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
   3268 			/*
   3269 			 * Need to hold the vnode until after VOP_RETZCBUF()
   3270 			 * is called.
   3271 			 */
   3272 			VN_HOLD(vp);
   3273 			goto doio_read;
   3274 		}
   3275 
   3276 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
   3277 		    uiop->uio_loffset, int, uiop->uio_resid);
   3278 
   3279 		uiop->uio_extflg = 0;
   3280 
   3281 		/* failure to setup for zero copy */
   3282 		rfs_free_xuio((void *)uiop);
   3283 		loaned_buffers = 0;
   3284 	}
   3285 
   3286 	/*
   3287 	 * If returning data via RDMA Write, then grab the chunk list. If we
   3288 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
   3289 	 */
   3290 	if (rdma_used) {
   3291 		mp = NULL;
   3292 		(void) rdma_get_wchunk(req, &iov, args->wlist);
   3293 	} else {
   3294 		/*
   3295 		 * mp will contain the data to be sent out in the read reply.
   3296 		 * It will be freed after the reply has been sent. Let's
   3297 		 * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
   3298 		 * the call to xdrmblk_putmblk() never fails. If the first
   3299 		 * alloc of the requested size fails, then decrease the size to
   3300 		 * something more reasonable and wait for the allocation to
   3301 		 * occur.
   3302 		 */
   3303 		mp = allocb(RNDUP(args->count), BPRI_MED);
   3304 		if (mp == NULL) {
   3305 			if (args->count > MAXBSIZE)
   3306 				args->count = MAXBSIZE;
   3307 			mp = allocb_wait(RNDUP(args->count), BPRI_MED,
   3308 			    STR_NOSIG, &alloc_err);
   3309 		}
   3310 		ASSERT(mp != NULL);
   3311 		ASSERT(alloc_err == 0);
   3312 
   3313 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
   3314 		iov.iov_len = args->count;
   3315 	}
   3316 
   3317 	uio.uio_iov = &iov;
   3318 	uio.uio_iovcnt = 1;
   3319 	uio.uio_segflg = UIO_SYSSPACE;
   3320 	uio.uio_extflg = UIO_COPY_CACHED;
   3321 	uio.uio_loffset = args->offset;
   3322 	uio.uio_resid = args->count;
   3323 	uiop = &uio;
   3324 
   3325 doio_read:
   3326 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
   3327 
   3328 	va.va_mask = AT_SIZE;
   3329 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
   3330 
   3331 	if (error) {
   3332 		if (mp)
   3333 			freemsg(mp);
   3334 		*cs->statusp = resp->status = puterrno4(error);
   3335 		goto out;
   3336 	}
   3337 
   3338 	/* make mblk using zc buffers */
   3339 	if (loaned_buffers) {
   3340 		mp = uio_to_mblk(uiop);
   3341 		ASSERT(mp != NULL);
   3342 	}
   3343 
   3344 	*cs->statusp = resp->status = NFS4_OK;
   3345 
   3346 	ASSERT(uiop->uio_resid >= 0);
   3347 	resp->data_len = args->count - uiop->uio_resid;
   3348 	if (mp) {
   3349 		resp->data_val = (char *)mp->b_datap->db_base;
   3350 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
   3351 	} else {
   3352 		resp->data_val = (caddr_t)iov.iov_base;
   3353 	}
   3354 
   3355 	resp->mblk = mp;
   3356 
   3357 	if (!verror && offset + resp->data_len == va.va_size)
   3358 		resp->eof = TRUE;
   3359 	else
   3360 		resp->eof = FALSE;
   3361 
   3362 	if (rdma_used) {
   3363 		if (!rdma_setup_read_data4(args, resp)) {
   3364 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   3365 		}
   3366 	} else {
   3367 		resp->wlist = NULL;
   3368 	}
   3369 
   3370 out:
   3371 	if (in_crit)
   3372 		nbl_end_crit(vp);
   3373 
   3374 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
   3375 	    READ4res *, resp);
   3376 }
   3377 
   3378 static void
   3379 rfs4_op_read_free(nfs_resop4 *resop)
   3380 {
   3381 	READ4res	*resp = &resop->nfs_resop4_u.opread;
   3382 
   3383 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
   3384 		freemsg(resp->mblk);
   3385 		resp->mblk = NULL;
   3386 		resp->data_val = NULL;
   3387 		resp->data_len = 0;
   3388 	}
   3389 }
   3390 
   3391 static void
   3392 rfs4_op_readdir_free(nfs_resop4 * resop)
   3393 {
   3394 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
   3395 
   3396 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
   3397 		freeb(resp->mblk);
   3398 		resp->mblk = NULL;
   3399 		resp->data_len = 0;
   3400 	}
   3401 }
   3402 
   3403 
   3404 /* ARGSUSED */
   3405 static void
   3406 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   3407     struct compound_state *cs)
   3408 {
   3409 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
   3410 	int		error;
   3411 	vnode_t		*vp;
   3412 	struct exportinfo *exi, *sav_exi;
   3413 	nfs_fh4_fmt_t	*fh_fmtp;
   3414 
   3415 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
   3416 
   3417 	if (cs->vp) {
   3418 		VN_RELE(cs->vp);
   3419 		cs->vp = NULL;
   3420 	}
   3421 
   3422 	if (cs->cr)
   3423 		crfree(cs->cr);
   3424 
   3425 	cs->cr = crdup(cs->basecr);
   3426 
   3427 	vp = exi_public->exi_vp;
   3428 	if (vp == NULL) {
   3429 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   3430 		goto out;
   3431 	}
   3432 
   3433 	error = makefh4(&cs->fh, vp, exi_public);
   3434 	if (error != 0) {
   3435 		*cs->statusp = resp->status = puterrno4(error);
   3436 		goto out;
   3437 	}
   3438 	sav_exi = cs->exi;
   3439 	if (exi_public == exi_root) {
   3440 		/*
   3441 		 * No filesystem is actually shared public, so we default
   3442 		 * to exi_root. In this case, we must check whether root
   3443 		 * is exported.
   3444 		 */
   3445 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
   3446 
   3447 		/*
   3448 		 * if root filesystem is exported, the exportinfo struct that we
   3449 		 * should use is what checkexport4 returns, because root_exi is
   3450 		 * actually a mostly empty struct.
   3451 		 */
   3452 		exi = checkexport4(&fh_fmtp->fh4_fsid,
   3453 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
   3454 		cs->exi = ((exi != NULL) ? exi : exi_public);
   3455 	} else {
   3456 		/*
   3457 		 * it's a properly shared filesystem
   3458 		 */
   3459 		cs->exi = exi_public;
   3460 	}
   3461 
   3462 	if (is_system_labeled()) {
   3463 		bslabel_t *clabel;
   3464 
   3465 		ASSERT(req->rq_label != NULL);
   3466 		clabel = req->rq_label;
   3467 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
   3468 		    "got client label from request(1)",
   3469 		    struct svc_req *, req);
   3470 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   3471 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
   3472 			    cs->exi)) {
   3473 				*cs->statusp = resp->status =
   3474 				    NFS4ERR_SERVERFAULT;
   3475 				goto out;
   3476 			}
   3477 		}
   3478 	}
   3479 
   3480 	VN_HOLD(vp);
   3481 	cs->vp = vp;
   3482 
   3483 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3484 		VN_RELE(cs->vp);
   3485 		cs->vp = NULL;
   3486 		cs->exi = sav_exi;
   3487 		goto out;
   3488 	}
   3489 
   3490 	*cs->statusp = resp->status = NFS4_OK;
   3491 out:
   3492 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
   3493 	    PUTPUBFH4res *, resp);
   3494 }
   3495 
   3496 /*
   3497  * XXX - issue with put*fh operations. Suppose /export/home is exported.
   3498  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
   3499  * or joe have restrictive search permissions, then we shouldn't let
   3500  * the client get a file handle. This is easy to enforce. However, we
   3501  * don't know what security flavor should be used until we resolve the
   3502  * path name. Another complication is uid mapping. If root is
   3503  * the user, then it will be mapped to the anonymous user by default,
   3504  * but we won't know that till we've resolved the path name. And we won't
   3505  * know what the anonymous user is.
   3506  * Luckily, SECINFO is specified to take a full filename.
   3507  * So what we will have to in rfs4_op_lookup is check that flavor of
   3508  * the target object matches that of the request, and if root was the
   3509  * caller, check for the root= and anon= options, and if necessary,
   3510  * repeat the lookup using the right cred_t. But that's not done yet.
   3511  */
   3512 /* ARGSUSED */
   3513 static void
   3514 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3515     struct compound_state *cs)
   3516 {
   3517 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
   3518 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
   3519 	nfs_fh4_fmt_t *fh_fmtp;
   3520 
   3521 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
   3522 	    PUTFH4args *, args);
   3523 
   3524 	if (cs->vp) {
   3525 		VN_RELE(cs->vp);
   3526 		cs->vp = NULL;
   3527 	}
   3528 
   3529 	if (cs->cr) {
   3530 		crfree(cs->cr);
   3531 		cs->cr = NULL;
   3532 	}
   3533 
   3534 
   3535 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
   3536 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
   3537 		goto out;
   3538 	}
   3539 
   3540 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
   3541 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
   3542 	    NULL);
   3543 
   3544 	if (cs->exi == NULL) {
   3545 		*cs->statusp = resp->status = NFS4ERR_STALE;
   3546 		goto out;
   3547 	}
   3548 
   3549 	cs->cr = crdup(cs->basecr);
   3550 
   3551 	ASSERT(cs->cr != NULL);
   3552 
   3553 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
   3554 		*cs->statusp = resp->status;
   3555 		goto out;
   3556 	}
   3557 
   3558 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3559 		VN_RELE(cs->vp);
   3560 		cs->vp = NULL;
   3561 		goto out;
   3562 	}
   3563 
   3564 	nfs_fh4_copy(&args->object, &cs->fh);
   3565 	*cs->statusp = resp->status = NFS4_OK;
   3566 	cs->deleg = FALSE;
   3567 
   3568 out:
   3569 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
   3570 	    PUTFH4res *, resp);
   3571 }
   3572 
   3573 /* ARGSUSED */
   3574 static void
   3575 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3576     struct compound_state *cs)
   3577 {
   3578 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
   3579 	int error;
   3580 	fid_t fid;
   3581 	struct exportinfo *exi, *sav_exi;
   3582 
   3583 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
   3584 
   3585 	if (cs->vp) {
   3586 		VN_RELE(cs->vp);
   3587 		cs->vp = NULL;
   3588 	}
   3589 
   3590 	if (cs->cr)
   3591 		crfree(cs->cr);
   3592 
   3593 	cs->cr = crdup(cs->basecr);
   3594 
   3595 	/*
   3596 	 * Using rootdir, the system root vnode,
   3597 	 * get its fid.
   3598 	 */
   3599 	bzero(&fid, sizeof (fid));
   3600 	fid.fid_len = MAXFIDSZ;
   3601 	error = vop_fid_pseudo(rootdir, &fid);
   3602 	if (error != 0) {
   3603 		*cs->statusp = resp->status = puterrno4(error);
   3604 		goto out;
   3605 	}
   3606 
   3607 	/*
   3608 	 * Then use the root fsid & fid it to find out if it's exported
   3609 	 *
   3610 	 * If the server root isn't exported directly, then
   3611 	 * it should at least be a pseudo export based on
   3612 	 * one or more exports further down in the server's
   3613 	 * file tree.
   3614 	 */
   3615 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
   3616 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
   3617 		NFS4_DEBUG(rfs4_debug,
   3618 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
   3619 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   3620 		goto out;
   3621 	}
   3622 
   3623 	/*
   3624 	 * Now make a filehandle based on the root
   3625 	 * export and root vnode.
   3626 	 */
   3627 	error = makefh4(&cs->fh, rootdir, exi);
   3628 	if (error != 0) {
   3629 		*cs->statusp = resp->status = puterrno4(error);
   3630 		goto out;
   3631 	}
   3632 
   3633 	sav_exi = cs->exi;
   3634 	cs->exi = exi;
   3635 
   3636 	VN_HOLD(rootdir);
   3637 	cs->vp = rootdir;
   3638 
   3639 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
   3640 		VN_RELE(rootdir);
   3641 		cs->vp = NULL;
   3642 		cs->exi = sav_exi;
   3643 		goto out;
   3644 	}
   3645 
   3646 	*cs->statusp = resp->status = NFS4_OK;
   3647 	cs->deleg = FALSE;
   3648 out:
   3649 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
   3650 	    PUTROOTFH4res *, resp);
   3651 }
   3652 
   3653 /*
   3654  * A directory entry is a valid nfsv4 entry if
   3655  * - it has a non-zero ino
   3656  * - it is not a dot or dotdot name
   3657  * - it is visible in a pseudo export or in a real export that can
   3658  *   only have a limited view.
   3659  */
   3660 static bool_t
   3661 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
   3662     int *expseudo, int check_visible)
   3663 {
   3664 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
   3665 		*expseudo = 0;
   3666 		return (FALSE);
   3667 	}
   3668 
   3669 	if (! check_visible) {
   3670 		*expseudo = 0;
   3671 		return (TRUE);
   3672 	}
   3673 
   3674 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
   3675 }
   3676 
   3677 /*
   3678  * set_rdattr_params sets up the variables used to manage what information
   3679  * to get for each directory entry.
   3680  */
   3681 static nfsstat4
   3682 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
   3683     bitmap4 attrs, bool_t *need_to_lookup)
   3684 {
   3685 	uint_t	va_mask;
   3686 	nfsstat4 status;
   3687 	bitmap4 objbits;
   3688 
   3689 	status = bitmap4_to_attrmask(attrs, sargp);
   3690 	if (status != NFS4_OK) {
   3691 		/*
   3692 		 * could not even figure attr mask
   3693 		 */
   3694 		return (status);
   3695 	}
   3696 	va_mask = sargp->vap->va_mask;
   3697 
   3698 	/*
   3699 	 * dirent's d_ino is always correct value for mounted_on_fileid.
   3700 	 * mntdfid_set is set once here, but mounted_on_fileid is
   3701 	 * set in main dirent processing loop for each dirent.
   3702 	 * The mntdfid_set is a simple optimization that lets the
   3703 	 * server attr code avoid work when caller is readdir.
   3704 	 */
   3705 	sargp->mntdfid_set = TRUE;
   3706 
   3707 	/*
   3708 	 * Lookup entry only if client asked for any of the following:
   3709 	 * a) vattr attrs
   3710 	 * b) vfs attrs
   3711 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
   3712 	 *    other than mounted_on_fileid (which we can take from dirent)
   3713 	 */
   3714 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
   3715 
   3716 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
   3717 		*need_to_lookup = TRUE;
   3718 	else
   3719 		*need_to_lookup = FALSE;
   3720 
   3721 	if (sargp->sbp == NULL)
   3722 		return (NFS4_OK);
   3723 
   3724 	/*
   3725 	 * If filesystem attrs are requested, get them now from the
   3726 	 * directory vp, as most entries will have same filesystem. The only
   3727 	 * exception are mounted over entries but we handle
   3728 	 * those as we go (XXX mounted over detection not yet implemented).
   3729 	 */
   3730 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
   3731 	status = bitmap4_get_sysattrs(sargp);
   3732 	sargp->vap->va_mask = va_mask;
   3733 
   3734 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
   3735 		/*
   3736 		 * Failed to get filesystem attributes.
   3737 		 * Return a rdattr_error for each entry, but don't fail.
   3738 		 * However, don't get any obj-dependent attrs.
   3739 		 */
   3740 		sargp->rdattr_error = status;	/* for rdattr_error */
   3741 		*need_to_lookup = FALSE;
   3742 		/*
   3743 		 * At least get fileid for regular readdir output
   3744 		 */
   3745 		sargp->vap->va_mask &= AT_NODEID;
   3746 		status = NFS4_OK;
   3747 	}
   3748 
   3749 	return (status);
   3750 }
   3751 
   3752 /*
   3753  * readlink: args: CURRENT_FH.
   3754  *	res: status. If success - CURRENT_FH unchanged, return linktext.
   3755  */
   3756 
   3757 /* ARGSUSED */
   3758 static void
   3759 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   3760     struct compound_state *cs)
   3761 {
   3762 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3763 	int error;
   3764 	vnode_t *vp;
   3765 	struct iovec iov;
   3766 	struct vattr va;
   3767 	struct uio uio;
   3768 	char *data;
   3769 	struct sockaddr *ca;
   3770 	char *name = NULL;
   3771 	int is_referral;
   3772 
   3773 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
   3774 
   3775 	/* CURRENT_FH: directory */
   3776 	vp = cs->vp;
   3777 	if (vp == NULL) {
   3778 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   3779 		goto out;
   3780 	}
   3781 
   3782 	if (cs->access == CS_ACCESS_DENIED) {
   3783 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3784 		goto out;
   3785 	}
   3786 
   3787 	/* Is it a referral? */
   3788 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
   3789 
   3790 		is_referral = 1;
   3791 
   3792 	} else {
   3793 
   3794 		is_referral = 0;
   3795 
   3796 		if (vp->v_type == VDIR) {
   3797 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
   3798 			goto out;
   3799 		}
   3800 
   3801 		if (vp->v_type != VLNK) {
   3802 			*cs->statusp = resp->status = NFS4ERR_INVAL;
   3803 			goto out;
   3804 		}
   3805 
   3806 	}
   3807 
   3808 	va.va_mask = AT_MODE;
   3809 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
   3810 	if (error) {
   3811 		*cs->statusp = resp->status = puterrno4(error);
   3812 		goto out;
   3813 	}
   3814 
   3815 	if (MANDLOCK(vp, va.va_mode)) {
   3816 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   3817 		goto out;
   3818 	}
   3819 
   3820 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
   3821 
   3822 	if (is_referral) {
   3823 		char *s;
   3824 		size_t strsz;
   3825 
   3826 		/* Get an artificial symlink based on a referral */
   3827 		s = build_symlink(vp, cs->cr, &strsz);
   3828 		global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
   3829 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
   3830 		    vnode_t *, vp, char *, s);
   3831 		if (s == NULL)
   3832 			error = EINVAL;
   3833 		else {
   3834 			error = 0;
   3835 			(void) strlcpy(data, s, MAXPATHLEN + 1);
   3836 			kmem_free(s, strsz);
   3837 		}
   3838 
   3839 	} else {
   3840 
   3841 		iov.iov_base = data;
   3842 		iov.iov_len = MAXPATHLEN;
   3843 		uio.uio_iov = &iov;
   3844 		uio.uio_iovcnt = 1;
   3845 		uio.uio_segflg = UIO_SYSSPACE;
   3846 		uio.uio_extflg = UIO_COPY_CACHED;
   3847 		uio.uio_loffset = 0;
   3848 		uio.uio_resid = MAXPATHLEN;
   3849 
   3850 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
   3851 
   3852 		if (!error)
   3853 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
   3854 	}
   3855 
   3856 	if (error) {
   3857 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3858 		*cs->statusp = resp->status = puterrno4(error);
   3859 		goto out;
   3860 	}
   3861 
   3862 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   3863 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
   3864 	    MAXPATHLEN  + 1);
   3865 
   3866 	if (name == NULL) {
   3867 		/*
   3868 		 * Even though the conversion failed, we return
   3869 		 * something. We just don't translate it.
   3870 		 */
   3871 		name = data;
   3872 	}
   3873 
   3874 	/*
   3875 	 * treat link name as data
   3876 	 */
   3877 	(void) str_to_utf8(name, &resp->link);
   3878 
   3879 	if (name != data)
   3880 		kmem_free(name, MAXPATHLEN + 1);
   3881 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
   3882 	*cs->statusp = resp->status = NFS4_OK;
   3883 
   3884 out:
   3885 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
   3886 	    READLINK4res *, resp);
   3887 }
   3888 
   3889 static void
   3890 rfs4_op_readlink_free(nfs_resop4 *resop)
   3891 {
   3892 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
   3893 	utf8string *symlink = &resp->link;
   3894 
   3895 	if (symlink->utf8string_val) {
   3896 		UTF8STRING_FREE(*symlink)
   3897 	}
   3898 }
   3899 
   3900 /*
   3901  * release_lockowner:
   3902  *	Release any state associated with the supplied
   3903  *	lockowner. Note if any lo_state is holding locks we will not
   3904  *	rele that lo_state and thus the lockowner will not be destroyed.
   3905  *	A client using lock after the lock owner stateid has been released
   3906  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
   3907  *	to reissue the lock with new_lock_owner set to TRUE.
   3908  *	args: lock_owner
   3909  *	res:  status
   3910  */
   3911 /* ARGSUSED */
   3912 static void
   3913 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
   3914     struct svc_req *req, struct compound_state *cs)
   3915 {
   3916 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
   3917 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
   3918 	rfs4_lockowner_t *lo;
   3919 	rfs4_openowner_t *oo;
   3920 	rfs4_state_t *sp;
   3921 	rfs4_lo_state_t *lsp;
   3922 	rfs4_client_t *cp;
   3923 	bool_t create = FALSE;
   3924 	locklist_t *llist;
   3925 	sysid_t sysid;
   3926 
   3927 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
   3928 	    cs, RELEASE_LOCKOWNER4args *, ap);
   3929 
   3930 	/* Make sure there is a clientid around for this request */
   3931 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
   3932 
   3933 	if (cp == NULL) {
   3934 		*cs->statusp = resp->status =
   3935 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
   3936 		goto out;
   3937 	}
   3938 	rfs4_client_rele(cp);
   3939 
   3940 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
   3941 	if (lo == NULL) {
   3942 		*cs->statusp = resp->status = NFS4_OK;
   3943 		goto out;
   3944 	}
   3945 	ASSERT(lo->rl_client != NULL);
   3946 
   3947 	/*
   3948 	 * Check for EXPIRED client. If so will reap state with in a lease
   3949 	 * period or on next set_clientid_confirm step
   3950 	 */
   3951 	if (rfs4_lease_expired(lo->rl_client)) {
   3952 		rfs4_lockowner_rele(lo);
   3953 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   3954 		goto out;
   3955 	}
   3956 
   3957 	/*
   3958 	 * If no sysid has been assigned, then no locks exist; just return.
   3959 	 */
   3960 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
   3961 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
   3962 		rfs4_lockowner_rele(lo);
   3963 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   3964 		goto out;
   3965 	}
   3966 
   3967 	sysid = lo->rl_client->rc_sysidt;
   3968 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   3969 
   3970 	/*
   3971 	 * Mark the lockowner invalid.
   3972 	 */
   3973 	rfs4_dbe_hide(lo->rl_dbe);
   3974 
   3975 	/*
   3976 	 * sysid-pid pair should now not be used since the lockowner is
   3977 	 * invalid. If the client were to instantiate the lockowner again
   3978 	 * it would be assigned a new pid. Thus we can get the list of
   3979 	 * current locks.
   3980 	 */
   3981 
   3982 	llist = flk_get_active_locks(sysid, lo->rl_pid);
   3983 	/* If we are still holding locks fail */
   3984 	if (llist != NULL) {
   3985 
   3986 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
   3987 
   3988 		flk_free_locklist(llist);
   3989 		/*
   3990 		 * We need to unhide the lockowner so the client can
   3991 		 * try it again. The bad thing here is if the client
   3992 		 * has a logic error that took it here in the first place
   3993 		 * he probably has lost accounting of the locks that it
   3994 		 * is holding. So we may have dangling state until the
   3995 		 * open owner state is reaped via close. One scenario
   3996 		 * that could possibly occur is that the client has
   3997 		 * sent the unlock request(s) in separate threads
   3998 		 * and has not waited for the replies before sending the
   3999 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
   4000 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
   4001 		 * reissuing the request.
   4002 		 */
   4003 		rfs4_dbe_unhide(lo->rl_dbe);
   4004 		rfs4_lockowner_rele(lo);
   4005 		goto out;
   4006 	}
   4007 
   4008 	/*
   4009 	 * For the corresponding client we need to check each open
   4010 	 * owner for any opens that have lockowner state associated
   4011 	 * with this lockowner.
   4012 	 */
   4013 
   4014 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
   4015 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
   4016 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
   4017 
   4018 		rfs4_dbe_lock(oo->ro_dbe);
   4019 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
   4020 		    sp = list_next(&oo->ro_statelist, sp)) {
   4021 
   4022 			rfs4_dbe_lock(sp->rs_dbe);
   4023 			for (lsp = list_head(&sp->rs_lostatelist);
   4024 			    lsp != NULL;
   4025 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
   4026 				if (lsp->rls_locker == lo) {
   4027 					rfs4_dbe_lock(lsp->rls_dbe);
   4028 					rfs4_dbe_invalidate(lsp->rls_dbe);
   4029 					rfs4_dbe_unlock(lsp->rls_dbe);
   4030 				}
   4031 			}
   4032 			rfs4_dbe_unlock(sp->rs_dbe);
   4033 		}
   4034 		rfs4_dbe_unlock(oo->ro_dbe);
   4035 	}
   4036 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
   4037 
   4038 	rfs4_lockowner_rele(lo);
   4039 
   4040 	*cs->statusp = resp->status = NFS4_OK;
   4041 
   4042 out:
   4043 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
   4044 	    cs, RELEASE_LOCKOWNER4res *, resp);
   4045 }
   4046 
   4047 /*
   4048  * short utility function to lookup a file and recall the delegation
   4049  */
   4050 static rfs4_file_t *
   4051 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
   4052     int *lkup_error, cred_t *cr)
   4053 {
   4054 	vnode_t *vp;
   4055 	rfs4_file_t *fp = NULL;
   4056 	bool_t fcreate = FALSE;
   4057 	int error;
   4058 
   4059 	if (vpp)
   4060 		*vpp = NULL;
   4061 
   4062 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
   4063 	    NULL)) == 0) {
   4064 		if (vp->v_type == VREG)
   4065 			fp = rfs4_findfile(vp, NULL, &fcreate);
   4066 		if (vpp)
   4067 			*vpp = vp;
   4068 		else
   4069 			VN_RELE(vp);
   4070 	}
   4071 
   4072 	if (lkup_error)
   4073 		*lkup_error = error;
   4074 
   4075 	return (fp);
   4076 }
   4077 
   4078 /*
   4079  * remove: args: CURRENT_FH: directory; name.
   4080  *	res: status. If success - CURRENT_FH unchanged, return change_info
   4081  *		for directory.
   4082  */
   4083 /* ARGSUSED */
   4084 static void
   4085 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4086     struct compound_state *cs)
   4087 {
   4088 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
   4089 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
   4090 	int error;
   4091 	vnode_t *dvp, *vp;
   4092 	struct vattr bdva, idva, adva;
   4093 	char *nm;
   4094 	uint_t len;
   4095 	rfs4_file_t *fp;
   4096 	int in_crit = 0;
   4097 	bslabel_t *clabel;
   4098 	struct sockaddr *ca;
   4099 	char *name = NULL;
   4100 
   4101 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
   4102 	    REMOVE4args *, args);
   4103 
   4104 	/* CURRENT_FH: directory */
   4105 	dvp = cs->vp;
   4106 	if (dvp == NULL) {
   4107 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4108 		goto out;
   4109 	}
   4110 
   4111 	if (cs->access == CS_ACCESS_DENIED) {
   4112 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4113 		goto out;
   4114 	}
   4115 
   4116 	/*
   4117 	 * If there is an unshared filesystem mounted on this vnode,
   4118 	 * Do not allow to remove anything in this directory.
   4119 	 */
   4120 	if (vn_ismntpt(dvp)) {
   4121 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4122 		goto out;
   4123 	}
   4124 
   4125 	if (dvp->v_type != VDIR) {
   4126 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   4127 		goto out;
   4128 	}
   4129 
   4130 	if (!utf8_dir_verify(&args->target)) {
   4131 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4132 		goto out;
   4133 	}
   4134 
   4135 	/*
   4136 	 * Lookup the file so that we can check if it's a directory
   4137 	 */
   4138 	nm = utf8_to_fn(&args->target, &len, NULL);
   4139 	if (nm == NULL) {
   4140 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4141 		goto out;
   4142 	}
   4143 
   4144 	if (len > MAXNAMELEN) {
   4145 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   4146 		kmem_free(nm, len);
   4147 		goto out;
   4148 	}
   4149 
   4150 	if (rdonly4(cs->exi, cs->vp, req)) {
   4151 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   4152 		kmem_free(nm, len);
   4153 		goto out;
   4154 	}
   4155 
   4156 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   4157 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   4158 	    MAXPATHLEN  + 1);
   4159 
   4160 	if (name == NULL) {
   4161 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4162 		kmem_free(nm, len);
   4163 		goto out;
   4164 	}
   4165 
   4166 	/*
   4167 	 * Lookup the file to determine type and while we are see if
   4168 	 * there is a file struct around and check for delegation.
   4169 	 * We don't need to acquire va_seq before this lookup, if
   4170 	 * it causes an update, cinfo.before will not match, which will
   4171 	 * trigger a cache flush even if atomic is TRUE.
   4172 	 */
   4173 	if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
   4174 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
   4175 		    NULL)) {
   4176 			VN_RELE(vp);
   4177 			rfs4_file_rele(fp);
   4178 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4179 			if (nm != name)
   4180 				kmem_free(name, MAXPATHLEN + 1);
   4181 			kmem_free(nm, len);
   4182 			goto out;
   4183 		}
   4184 	}
   4185 
   4186 	/* Didn't find anything to remove */
   4187 	if (vp == NULL) {
   4188 		*cs->statusp = resp->status = error;
   4189 		if (nm != name)
   4190 			kmem_free(name, MAXPATHLEN + 1);
   4191 		kmem_free(nm, len);
   4192 		goto out;
   4193 	}
   4194 
   4195 	if (nbl_need_check(vp)) {
   4196 		nbl_start_crit(vp, RW_READER);
   4197 		in_crit = 1;
   4198 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
   4199 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4200 			if (nm != name)
   4201 				kmem_free(name, MAXPATHLEN + 1);
   4202 			kmem_free(nm, len);
   4203 			nbl_end_crit(vp);
   4204 			VN_RELE(vp);
   4205 			if (fp) {
   4206 				rfs4_clear_dont_grant(fp);
   4207 				rfs4_file_rele(fp);
   4208 			}
   4209 			goto out;
   4210 		}
   4211 	}
   4212 
   4213 	/* check label before allowing removal */
   4214 	if (is_system_labeled()) {
   4215 		ASSERT(req->rq_label != NULL);
   4216 		clabel = req->rq_label;
   4217 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   4218 		    "got client label from request(1)",
   4219 		    struct svc_req *, req);
   4220 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   4221 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
   4222 			    cs->exi)) {
   4223 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4224 				if (name != nm)
   4225 					kmem_free(name, MAXPATHLEN + 1);
   4226 				kmem_free(nm, len);
   4227 				if (in_crit)
   4228 					nbl_end_crit(vp);
   4229 				VN_RELE(vp);
   4230 				if (fp) {
   4231 					rfs4_clear_dont_grant(fp);
   4232 					rfs4_file_rele(fp);
   4233 				}
   4234 				goto out;
   4235 			}
   4236 		}
   4237 	}
   4238 
   4239 	/* Get dir "before" change value */
   4240 	bdva.va_mask = AT_CTIME|AT_SEQ;
   4241 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
   4242 	if (error) {
   4243 		*cs->statusp = resp->status = puterrno4(error);
   4244 		if (nm != name)
   4245 			kmem_free(name, MAXPATHLEN + 1);
   4246 		kmem_free(nm, len);
   4247 		if (in_crit)
   4248 			nbl_end_crit(vp);
   4249 		VN_RELE(vp);
   4250 		if (fp) {
   4251 			rfs4_clear_dont_grant(fp);
   4252 			rfs4_file_rele(fp);
   4253 		}
   4254 		goto out;
   4255 	}
   4256 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
   4257 
   4258 	/* Actually do the REMOVE operation */
   4259 	if (vp->v_type == VDIR) {
   4260 		/*
   4261 		 * Can't remove a directory that has a mounted-on filesystem.
   4262 		 */
   4263 		if (vn_ismntpt(vp)) {
   4264 			error = EACCES;
   4265 		} else {
   4266 			/*
   4267 			 * System V defines rmdir to return EEXIST,
   4268 			 * not * ENOTEMPTY, if the directory is not
   4269 			 * empty.  A System V NFS server needs to map
   4270 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
   4271 			 * transmit over the wire.
   4272 			 */
   4273 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr,
   4274 			    NULL, 0)) == EEXIST)
   4275 				error = ENOTEMPTY;
   4276 		}
   4277 	} else {
   4278 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
   4279 		    fp != NULL) {
   4280 			struct vattr va;
   4281 			vnode_t *tvp;
   4282 
   4283 			rfs4_dbe_lock(fp->rf_dbe);
   4284 			tvp = fp->rf_vp;
   4285 			if (tvp)
   4286 				VN_HOLD(tvp);
   4287 			rfs4_dbe_unlock(fp->rf_dbe);
   4288 
   4289 			if (tvp) {
   4290 				/*
   4291 				 * This is va_seq safe because we are not
   4292 				 * manipulating dvp.
   4293 				 */
   4294 				va.va_mask = AT_NLINK;
   4295 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
   4296 				    va.va_nlink == 0) {
   4297 					/* Remove state on file remove */
   4298 					if (in_crit) {
   4299 						nbl_end_crit(vp);
   4300 						in_crit = 0;
   4301 					}
   4302 					rfs4_close_all_state(fp);
   4303 				}
   4304 				VN_RELE(tvp);
   4305 			}
   4306 		}
   4307 	}
   4308 
   4309 	if (in_crit)
   4310 		nbl_end_crit(vp);
   4311 	VN_RELE(vp);
   4312 
   4313 	if (fp) {
   4314 		rfs4_clear_dont_grant(fp);
   4315 		rfs4_file_rele(fp);
   4316 	}
   4317 	if (nm != name)
   4318 		kmem_free(name, MAXPATHLEN + 1);
   4319 	kmem_free(nm, len);
   4320 
   4321 	if (error) {
   4322 		*cs->statusp = resp->status = puterrno4(error);
   4323 		goto out;
   4324 	}
   4325 
   4326 	/*
   4327 	 * Get the initial "after" sequence number, if it fails, set to zero
   4328 	 */
   4329 	idva.va_mask = AT_SEQ;
   4330 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
   4331 		idva.va_seq = 0;
   4332 
   4333 	/*
   4334 	 * Force modified data and metadata out to stable storage.
   4335 	 */
   4336 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   4337 
   4338 	/*
   4339 	 * Get "after" change value, if it fails, simply return the
   4340 	 * before value.
   4341 	 */
   4342 	adva.va_mask = AT_CTIME|AT_SEQ;
   4343 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
   4344 		adva.va_ctime = bdva.va_ctime;
   4345 		adva.va_seq = 0;
   4346 	}
   4347 
   4348 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
   4349 
   4350 	/*
   4351 	 * The cinfo.atomic = TRUE only if we have
   4352 	 * non-zero va_seq's, and it has incremented by exactly one
   4353 	 * during the VOP_REMOVE/RMDIR and it didn't change during
   4354 	 * the VOP_FSYNC.
   4355 	 */
   4356 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
   4357 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
   4358 		resp->cinfo.atomic = TRUE;
   4359 	else
   4360 		resp->cinfo.atomic = FALSE;
   4361 
   4362 	*cs->statusp = resp->status = NFS4_OK;
   4363 
   4364 out:
   4365 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
   4366 	    REMOVE4res *, resp);
   4367 }
   4368 
   4369 /*
   4370  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
   4371  *		oldname and newname.
   4372  *	res: status. If success - CURRENT_FH unchanged, return change_info
   4373  *		for both from and target directories.
   4374  */
   4375 /* ARGSUSED */
   4376 static void
   4377 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4378     struct compound_state *cs)
   4379 {
   4380 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
   4381 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
   4382 	int error;
   4383 	vnode_t *odvp;
   4384 	vnode_t *ndvp;
   4385 	vnode_t *srcvp, *targvp;
   4386 	struct vattr obdva, oidva, oadva;
   4387 	struct vattr nbdva, nidva, nadva;
   4388 	char *onm, *nnm;
   4389 	uint_t olen, nlen;
   4390 	rfs4_file_t *fp, *sfp;
   4391 	int in_crit_src, in_crit_targ;
   4392 	int fp_rele_grant_hold, sfp_rele_grant_hold;
   4393 	bslabel_t *clabel;
   4394 	struct sockaddr *ca;
   4395 	char *converted_onm = NULL;
   4396 	char *converted_nnm = NULL;
   4397 
   4398 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
   4399 	    RENAME4args *, args);
   4400 
   4401 	fp = sfp = NULL;
   4402 	srcvp = targvp = NULL;
   4403 	in_crit_src = in_crit_targ = 0;
   4404 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
   4405 
   4406 	/* CURRENT_FH: target directory */
   4407 	ndvp = cs->vp;
   4408 	if (ndvp == NULL) {
   4409 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4410 		goto out;
   4411 	}
   4412 
   4413 	/* SAVED_FH: from directory */
   4414 	odvp = cs->saved_vp;
   4415 	if (odvp == NULL) {
   4416 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4417 		goto out;
   4418 	}
   4419 
   4420 	if (cs->access == CS_ACCESS_DENIED) {
   4421 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4422 		goto out;
   4423 	}
   4424 
   4425 	/*
   4426 	 * If there is an unshared filesystem mounted on this vnode,
   4427 	 * do not allow to rename objects in this directory.
   4428 	 */
   4429 	if (vn_ismntpt(odvp)) {
   4430 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4431 		goto out;
   4432 	}
   4433 
   4434 	/*
   4435 	 * If there is an unshared filesystem mounted on this vnode,
   4436 	 * do not allow to rename to this directory.
   4437 	 */
   4438 	if (vn_ismntpt(ndvp)) {
   4439 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4440 		goto out;
   4441 	}
   4442 
   4443 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
   4444 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
   4445 		goto out;
   4446 	}
   4447 
   4448 	if (cs->saved_exi != cs->exi) {
   4449 		*cs->statusp = resp->status = NFS4ERR_XDEV;
   4450 		goto out;
   4451 	}
   4452 
   4453 	if (!utf8_dir_verify(&args->oldname)) {
   4454 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4455 		goto out;
   4456 	}
   4457 
   4458 	if (!utf8_dir_verify(&args->newname)) {
   4459 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4460 		goto out;
   4461 	}
   4462 
   4463 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
   4464 	if (onm == NULL) {
   4465 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4466 		goto out;
   4467 	}
   4468 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   4469 	nlen = MAXPATHLEN + 1;
   4470 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
   4471 	    nlen);
   4472 
   4473 	if (converted_onm == NULL) {
   4474 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4475 		kmem_free(onm, olen);
   4476 		goto out;
   4477 	}
   4478 
   4479 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   4480 	if (nnm == NULL) {
   4481 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4482 		if (onm != converted_onm)
   4483 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4484 		kmem_free(onm, olen);
   4485 		goto out;
   4486 	}
   4487 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
   4488 	    MAXPATHLEN  + 1);
   4489 
   4490 	if (converted_nnm == NULL) {
   4491 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   4492 		kmem_free(nnm, nlen);
   4493 		nnm = NULL;
   4494 		if (onm != converted_onm)
   4495 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4496 		kmem_free(onm, olen);
   4497 		goto out;
   4498 	}
   4499 
   4500 
   4501 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
   4502 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
   4503 		kmem_free(onm, olen);
   4504 		kmem_free(nnm, nlen);
   4505 		goto out;
   4506 	}
   4507 
   4508 
   4509 	if (rdonly4(cs->exi, cs->vp, req)) {
   4510 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   4511 		if (onm != converted_onm)
   4512 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4513 		kmem_free(onm, olen);
   4514 		if (nnm != converted_nnm)
   4515 			kmem_free(converted_nnm, MAXPATHLEN + 1);
   4516 		kmem_free(nnm, nlen);
   4517 		goto out;
   4518 	}
   4519 
   4520 	/* check label of the target dir */
   4521 	if (is_system_labeled()) {
   4522 		ASSERT(req->rq_label != NULL);
   4523 		clabel = req->rq_label;
   4524 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
   4525 		    "got client label from request(1)",
   4526 		    struct svc_req *, req);
   4527 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   4528 			if (!do_rfs_label_check(clabel, ndvp,
   4529 			    EQUALITY_CHECK, cs->exi)) {
   4530 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   4531 				goto err_out;
   4532 			}
   4533 		}
   4534 	}
   4535 
   4536 	/*
   4537 	 * Is the source a file and have a delegation?
   4538 	 * We don't need to acquire va_seq before these lookups, if
   4539 	 * it causes an update, cinfo.before will not match, which will
   4540 	 * trigger a cache flush even if atomic is TRUE.
   4541 	 */
   4542 	if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
   4543 	    &error, cs->cr)) {
   4544 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
   4545 		    NULL)) {
   4546 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4547 			goto err_out;
   4548 		}
   4549 	}
   4550 
   4551 	if (srcvp == NULL) {
   4552 		*cs->statusp = resp->status = puterrno4(error);
   4553 		if (onm != converted_onm)
   4554 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4555 		kmem_free(onm, olen);
   4556 		if (nnm != converted_nnm)
   4557 			kmem_free(converted_onm, MAXPATHLEN + 1);
   4558 		kmem_free(nnm, nlen);
   4559 		goto out;
   4560 	}
   4561 
   4562 	sfp_rele_grant_hold = 1;
   4563 
   4564 	/* Does the destination exist and a file and have a delegation? */
   4565 	if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
   4566 	    NULL, cs->cr)) {
   4567 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
   4568 		    NULL)) {
   4569 			*cs->statusp = resp->status = NFS4ERR_DELAY;
   4570 			goto err_out;
   4571 		}
   4572 	}
   4573 	fp_rele_grant_hold = 1;
   4574 
   4575 
   4576 	/* Check for NBMAND lock on both source and target */
   4577 	if (nbl_need_check(srcvp)) {
   4578 		nbl_start_crit(srcvp, RW_READER);
   4579 		in_crit_src = 1;
   4580 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
   4581 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4582 			goto err_out;
   4583 		}
   4584 	}
   4585 
   4586 	if (targvp && nbl_need_check(targvp)) {
   4587 		nbl_start_crit(targvp, RW_READER);
   4588 		in_crit_targ = 1;
   4589 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
   4590 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
   4591 			goto err_out;
   4592 		}
   4593 	}
   4594 
   4595 	/* Get source "before" change value */
   4596 	obdva.va_mask = AT_CTIME|AT_SEQ;
   4597 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
   4598 	if (!error) {
   4599 		nbdva.va_mask = AT_CTIME|AT_SEQ;
   4600 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
   4601 	}
   4602 	if (error) {
   4603 		*cs->statusp = resp->status = puterrno4(error);
   4604 		goto err_out;
   4605 	}
   4606 
   4607 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
   4608 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
   4609 
   4610 	if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
   4611 	    cs->cr, NULL, 0)) == 0 && fp != NULL) {
   4612 		struct vattr va;
   4613 		vnode_t *tvp;
   4614 
   4615 		rfs4_dbe_lock(fp->rf_dbe);
   4616 		tvp = fp->rf_vp;
   4617 		if (tvp)
   4618 			VN_HOLD(tvp);
   4619 		rfs4_dbe_unlock(fp->rf_dbe);
   4620 
   4621 		if (tvp) {
   4622 			va.va_mask = AT_NLINK;
   4623 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
   4624 			    va.va_nlink == 0) {
   4625 				/* The file is gone and so should the state */
   4626 				if (in_crit_targ) {
   4627 					nbl_end_crit(targvp);
   4628 					in_crit_targ = 0;
   4629 				}
   4630 				rfs4_close_all_state(fp);
   4631 			}
   4632 			VN_RELE(tvp);
   4633 		}
   4634 	}
   4635 	if (error == 0)
   4636 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
   4637 
   4638 	if (in_crit_src)
   4639 		nbl_end_crit(srcvp);
   4640 	if (srcvp)
   4641 		VN_RELE(srcvp);
   4642 	if (in_crit_targ)
   4643 		nbl_end_crit(targvp);
   4644 	if (targvp)
   4645 		VN_RELE(targvp);
   4646 
   4647 	if (sfp) {
   4648 		rfs4_clear_dont_grant(sfp);
   4649 		rfs4_file_rele(sfp);
   4650 	}
   4651 	if (fp) {
   4652 		rfs4_clear_dont_grant(fp);
   4653 		rfs4_file_rele(fp);
   4654 	}
   4655 
   4656 	if (converted_onm != onm)
   4657 		kmem_free(converted_onm, MAXPATHLEN + 1);
   4658 	kmem_free(onm, olen);
   4659 	if (converted_nnm != nnm)
   4660 		kmem_free(converted_nnm, MAXPATHLEN + 1);
   4661 	kmem_free(nnm, nlen);
   4662 
   4663 	/*
   4664 	 * Get the initial "after" sequence number, if it fails, set to zero
   4665 	 */
   4666 	oidva.va_mask = AT_SEQ;
   4667 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
   4668 		oidva.va_seq = 0;
   4669 
   4670 	nidva.va_mask = AT_SEQ;
   4671 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
   4672 		nidva.va_seq = 0;
   4673 
   4674 	/*
   4675 	 * Force modified data and metadata out to stable storage.
   4676 	 */
   4677 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
   4678 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
   4679 
   4680 	if (error) {
   4681 		*cs->statusp = resp->status = puterrno4(error);
   4682 		goto out;
   4683 	}
   4684 
   4685 	/*
   4686 	 * Get "after" change values, if it fails, simply return the
   4687 	 * before value.
   4688 	 */
   4689 	oadva.va_mask = AT_CTIME|AT_SEQ;
   4690 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
   4691 		oadva.va_ctime = obdva.va_ctime;
   4692 		oadva.va_seq = 0;
   4693 	}
   4694 
   4695 	nadva.va_mask = AT_CTIME|AT_SEQ;
   4696 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
   4697 		nadva.va_ctime = nbdva.va_ctime;
   4698 		nadva.va_seq = 0;
   4699 	}
   4700 
   4701 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
   4702 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
   4703 
   4704 	/*
   4705 	 * The cinfo.atomic = TRUE only if we have
   4706 	 * non-zero va_seq's, and it has incremented by exactly one
   4707 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
   4708 	 */
   4709 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
   4710 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
   4711 		resp->source_cinfo.atomic = TRUE;
   4712 	else
   4713 		resp->source_cinfo.atomic = FALSE;
   4714 
   4715 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
   4716 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
   4717 		resp->target_cinfo.atomic = TRUE;
   4718 	else
   4719 		resp->target_cinfo.atomic = FALSE;
   4720 
   4721 #ifdef	VOLATILE_FH_TEST
   4722 	{
   4723 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
   4724 
   4725 	/*
   4726 	 * Add the renamed file handle to the volatile rename list
   4727 	 */
   4728 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
   4729 		/* file handles may expire on rename */
   4730 		vnode_t *vp;
   4731 
   4732 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
   4733 		/*
   4734 		 * Already know that nnm will be a valid string
   4735 		 */
   4736 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
   4737 		    NULL, NULL, NULL);
   4738 		kmem_free(nnm, nlen);
   4739 		if (!error) {
   4740 			add_volrnm_fh(cs->exi, vp);
   4741 			VN_RELE(vp);
   4742 		}
   4743 	}
   4744 	}
   4745 #endif	/* VOLATILE_FH_TEST */
   4746 
   4747 	*cs->statusp = resp->status = NFS4_OK;
   4748 out:
   4749 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
   4750 	    RENAME4res *, resp);
   4751 	return;
   4752 
   4753 err_out:
   4754 	if (onm != converted_onm)
   4755 		kmem_free(converted_onm, MAXPATHLEN + 1);
   4756 	if (onm != NULL)
   4757 		kmem_free(onm, olen);
   4758 	if (nnm != converted_nnm)
   4759 		kmem_free(converted_nnm, MAXPATHLEN + 1);
   4760 	if (nnm != NULL)
   4761 		kmem_free(nnm, nlen);
   4762 
   4763 	if (in_crit_src) nbl_end_crit(srcvp);
   4764 	if (in_crit_targ) nbl_end_crit(targvp);
   4765 	if (targvp) VN_RELE(targvp);
   4766 	if (srcvp) VN_RELE(srcvp);
   4767 	if (sfp) {
   4768 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
   4769 		rfs4_file_rele(sfp);
   4770 	}
   4771 	if (fp) {
   4772 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
   4773 		rfs4_file_rele(fp);
   4774 	}
   4775 
   4776 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
   4777 	    RENAME4res *, resp);
   4778 }
   4779 
   4780 /* ARGSUSED */
   4781 static void
   4782 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4783     struct compound_state *cs)
   4784 {
   4785 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
   4786 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
   4787 	rfs4_client_t *cp;
   4788 
   4789 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
   4790 	    RENEW4args *, args);
   4791 
   4792 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
   4793 		*cs->statusp = resp->status =
   4794 		    rfs4_check_clientid(&args->clientid, 0);
   4795 		goto out;
   4796 	}
   4797 
   4798 	if (rfs4_lease_expired(cp)) {
   4799 		rfs4_client_rele(cp);
   4800 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   4801 		goto out;
   4802 	}
   4803 
   4804 	rfs4_update_lease(cp);
   4805 
   4806 	mutex_enter(cp->rc_cbinfo.cb_lock);
   4807 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
   4808 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
   4809 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
   4810 	} else {
   4811 		*cs->statusp = resp->status = NFS4_OK;
   4812 	}
   4813 	mutex_exit(cp->rc_cbinfo.cb_lock);
   4814 
   4815 	rfs4_client_rele(cp);
   4816 
   4817 out:
   4818 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
   4819 	    RENEW4res *, resp);
   4820 }
   4821 
   4822 /* ARGSUSED */
   4823 static void
   4824 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
   4825     struct compound_state *cs)
   4826 {
   4827 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
   4828 
   4829 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
   4830 
   4831 	/* No need to check cs->access - we are not accessing any object */
   4832 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
   4833 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
   4834 		goto out;
   4835 	}
   4836 	if (cs->vp != NULL) {
   4837 		VN_RELE(cs->vp);
   4838 	}
   4839 	cs->vp = cs->saved_vp;
   4840 	cs->saved_vp = NULL;
   4841 	cs->exi = cs->saved_exi;
   4842 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
   4843 	*cs->statusp = resp->status = NFS4_OK;
   4844 	cs->deleg = FALSE;
   4845 
   4846 out:
   4847 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
   4848 	    RESTOREFH4res *, resp);
   4849 }
   4850 
   4851 /* ARGSUSED */
   4852 static void
   4853 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   4854     struct compound_state *cs)
   4855 {
   4856 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
   4857 
   4858 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
   4859 
   4860 	/* No need to check cs->access - we are not accessing any object */
   4861 	if (cs->vp == NULL) {
   4862 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   4863 		goto out;
   4864 	}
   4865 	if (cs->saved_vp != NULL) {
   4866 		VN_RELE(cs->saved_vp);
   4867 	}
   4868 	cs->saved_vp = cs->vp;
   4869 	VN_HOLD(cs->saved_vp);
   4870 	cs->saved_exi = cs->exi;
   4871 	/*
   4872 	 * since SAVEFH is fairly rare, don't alloc space for its fh
   4873 	 * unless necessary.
   4874 	 */
   4875 	if (cs->saved_fh.nfs_fh4_val == NULL) {
   4876 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
   4877 	}
   4878 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
   4879 	*cs->statusp = resp->status = NFS4_OK;
   4880 
   4881 out:
   4882 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
   4883 	    SAVEFH4res *, resp);
   4884 }
   4885 
   4886 /*
   4887  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
   4888  * return the bitmap of attrs that were set successfully. It is also
   4889  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
   4890  * always be called only after rfs4_do_set_attrs().
   4891  *
   4892  * Verify that the attributes are same as the expected ones. sargp->vap
   4893  * and sargp->sbp contain the input attributes as translated from fattr4.
   4894  *
   4895  * This function verifies only the attrs that correspond to a vattr or
   4896  * vfsstat struct. That is because of the extra step needed to get the
   4897  * corresponding system structs. Other attributes have already been set or
   4898  * verified by do_rfs4_set_attrs.
   4899  *
   4900  * Return 0 if all attrs match, -1 if some don't, error if error processing.
   4901  */
   4902 static int
   4903 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
   4904     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
   4905 {
   4906 	int error, ret_error = 0;
   4907 	int i, k;
   4908 	uint_t sva_mask = sargp->vap->va_mask;
   4909 	uint_t vbit;
   4910 	union nfs4_attr_u *na;
   4911 	uint8_t *amap;
   4912 	bool_t getsb = ntovp->vfsstat;
   4913 
   4914 	if (sva_mask != 0) {
   4915 		/*
   4916 		 * Okay to overwrite sargp->vap because we verify based
   4917 		 * on the incoming values.
   4918 		 */
   4919 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
   4920 		    sargp->cs->cr, NULL);
   4921 		if (ret_error) {
   4922 			if (resp == NULL)
   4923 				return (ret_error);
   4924 			/*
   4925 			 * Must return bitmap of successful attrs
   4926 			 */
   4927 			sva_mask = 0;	/* to prevent checking vap later */
   4928 		} else {
   4929 			/*
   4930 			 * Some file systems clobber va_mask. it is probably
   4931 			 * wrong of them to do so, nonethless we practice
   4932 			 * defensive coding.
   4933 			 * See bug id 4276830.
   4934 			 */
   4935 			sargp->vap->va_mask = sva_mask;
   4936 		}
   4937 	}
   4938 
   4939 	if (getsb) {
   4940 		/*
   4941 		 * Now get the superblock and loop on the bitmap, as there is
   4942 		 * no simple way of translating from superblock to bitmap4.
   4943 		 */
   4944 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
   4945 		if (ret_error) {
   4946 			if (resp == NULL)
   4947 				goto errout;
   4948 			getsb = FALSE;
   4949 		}
   4950 	}
   4951 
   4952 	/*
   4953 	 * Now loop and verify each attribute which getattr returned
   4954 	 * whether it's the same as the input.
   4955 	 */
   4956 	if (resp == NULL && !getsb && (sva_mask == 0))
   4957 		goto errout;
   4958 
   4959 	na = ntovp->na;
   4960 	amap = ntovp->amap;
   4961 	k = 0;
   4962 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
   4963 		k = *amap;
   4964 		ASSERT(nfs4_ntov_map[k].nval == k);
   4965 		vbit = nfs4_ntov_map[k].vbit;
   4966 
   4967 		/*
   4968 		 * If vattr attribute but VOP_GETATTR failed, or it's
   4969 		 * superblock attribute but VFS_STATVFS failed, skip
   4970 		 */
   4971 		if (vbit) {
   4972 			if ((vbit & sva_mask) == 0)
   4973 				continue;
   4974 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
   4975 			continue;
   4976 		}
   4977 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
   4978 		if (resp != NULL) {
   4979 			if (error)
   4980 				ret_error = -1;	/* not all match */
   4981 			else	/* update response bitmap */
   4982 				*resp |= nfs4_ntov_map[k].fbit;
   4983 			continue;
   4984 		}
   4985 		if (error) {
   4986 			ret_error = -1;	/* not all match */
   4987 			break;
   4988 		}
   4989 	}
   4990 errout:
   4991 	return (ret_error);
   4992 }
   4993 
   4994 /*
   4995  * Decode the attribute to be set/verified. If the attr requires a sys op
   4996  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
   4997  * call the sv_getit function for it, because the sys op hasn't yet been done.
   4998  * Return 0 for success, error code if failed.
   4999  *
   5000  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
   5001  */
   5002 static int
   5003 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
   5004     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
   5005 {
   5006 	int error = 0;
   5007 	bool_t set_later;
   5008 
   5009 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
   5010 
   5011 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
   5012 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
   5013 		/*
   5014 		 * don't verify yet if a vattr or sb dependent attr,
   5015 		 * because we don't have their sys values yet.
   5016 		 * Will be done later.
   5017 		 */
   5018 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
   5019 			/*
   5020 			 * ACLs are a special case, since setting the MODE
   5021 			 * conflicts with setting the ACL.  We delay setting
   5022 			 * the ACL until all other attributes have been set.
   5023 			 * The ACL gets set in do_rfs4_op_setattr().
   5024 			 */
   5025 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
   5026 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
   5027 				    sargp, nap);
   5028 				if (error) {
   5029 					xdr_free(nfs4_ntov_map[k].xfunc,
   5030 					    (caddr_t)nap);
   5031 				}
   5032 			}
   5033 		}
   5034 	} else {
   5035 #ifdef  DEBUG
   5036 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
   5037 		    "decoding attribute %d\n", k);
   5038 #endif
   5039 		error = EINVAL;
   5040 	}
   5041 	if (!error && resp_bval && !set_later) {
   5042 		*resp_bval |= nfs4_ntov_map[k].fbit;
   5043 	}
   5044 
   5045 	return (error);
   5046 }
   5047 
   5048 /*
   5049  * Set vattr based on incoming fattr4 attrs - used by setattr.
   5050  * Set response mask. Ignore any values that are not writable vattr attrs.
   5051  */
   5052 static nfsstat4
   5053 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
   5054     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
   5055     nfs4_attr_cmd_t cmd)
   5056 {
   5057 	int error = 0;
   5058 	int i;
   5059 	char *attrs = fattrp->attrlist4;
   5060 	uint32_t attrslen = fattrp->attrlist4_len;
   5061 	XDR xdr;
   5062 	nfsstat4 status = NFS4_OK;
   5063 	vnode_t *vp = cs->vp;
   5064 	union nfs4_attr_u *na;
   5065 	uint8_t *amap;
   5066 
   5067 #ifndef lint
   5068 	/*
   5069 	 * Make sure that maximum attribute number can be expressed as an
   5070 	 * 8 bit quantity.
   5071 	 */
   5072 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
   5073 #endif
   5074 
   5075 	if (vp == NULL) {
   5076 		if (resp)
   5077 			*resp = 0;
   5078 		return (NFS4ERR_NOFILEHANDLE);
   5079 	}
   5080 	if (cs->access == CS_ACCESS_DENIED) {
   5081 		if (resp)
   5082 			*resp = 0;
   5083 		return (NFS4ERR_ACCESS);
   5084 	}
   5085 
   5086 	sargp->op = cmd;
   5087 	sargp->cs = cs;
   5088 	sargp->flag = 0;	/* may be set later */
   5089 	sargp->vap->va_mask = 0;
   5090 	sargp->rdattr_error = NFS4_OK;
   5091 	sargp->rdattr_error_req = FALSE;
   5092 	/* sargp->sbp is set by the caller */
   5093 
   5094 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
   5095 
   5096 	na = ntovp->na;
   5097 	amap = ntovp->amap;
   5098 
   5099 	/*
   5100 	 * The following loop iterates on the nfs4_ntov_map checking
   5101 	 * if the fbit is set in the requested bitmap.
   5102 	 * If set then we process the arguments using the
   5103 	 * rfs4_fattr4 conversion functions to populate the setattr
   5104 	 * vattr and va_mask. Any settable attrs that are not using vattr
   5105 	 * will be set in this loop.
   5106 	 */
   5107 	for (i = 0; i < nfs4_ntov_map_size; i++) {
   5108 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
   5109 			continue;
   5110 		}
   5111 		/*
   5112 		 * If setattr, must be a writable attr.
   5113 		 * If verify/nverify, must be a readable attr.
   5114 		 */
   5115 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
   5116 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
   5117 			/*
   5118 			 * Client tries to set/verify an
   5119 			 * unsupported attribute, tries to set
   5120 			 * a read only attr or verify a write
   5121 			 * only one - error!
   5122 			 */
   5123 			break;
   5124 		}
   5125 		/*
   5126 		 * Decode the attribute to set/verify
   5127 		 */
   5128 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
   5129 		    &xdr, resp ? resp : NULL, na);
   5130 		if (error)
   5131 			break;
   5132 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
   5133 		na++;
   5134 		(ntovp->attrcnt)++;
   5135 		if (nfs4_ntov_map[i].vfsstat)
   5136 			ntovp->vfsstat = TRUE;
   5137 	}
   5138 
   5139 	if (error != 0)
   5140 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
   5141 		    puterrno4(error));
   5142 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
   5143 	return (status);
   5144 }
   5145 
   5146 static nfsstat4
   5147 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
   5148     stateid4 *stateid)
   5149 {
   5150 	int error = 0;
   5151 	struct nfs4_svgetit_arg sarg;
   5152 	bool_t trunc;
   5153 
   5154 	nfsstat4 status = NFS4_OK;
   5155 	cred_t *cr = cs->cr;
   5156 	vnode_t *vp = cs->vp;
   5157 	struct nfs4_ntov_table ntov;
   5158 	struct statvfs64 sb;
   5159 	struct vattr bva;
   5160 	struct flock64 bf;
   5161 	int in_crit = 0;
   5162 	uint_t saved_mask = 0;
   5163 	caller_context_t ct;
   5164 
   5165 	*resp = 0;
   5166 	sarg.sbp = &sb;
   5167 	sarg.is_referral = B_FALSE;
   5168 	nfs4_ntov_table_init(&ntov);
   5169 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
   5170 	    NFS4ATTR_SETIT);
   5171 	if (status != NFS4_OK) {
   5172 		/*
   5173 		 * failed set attrs
   5174 		 */
   5175 		goto done;
   5176 	}
   5177 	if ((sarg.vap->va_mask == 0) &&
   5178 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
   5179 		/*
   5180 		 * no further work to be done
   5181 		 */
   5182 		goto done;
   5183 	}
   5184 
   5185 	/*
   5186 	 * If we got a request to set the ACL and the MODE, only
   5187 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
   5188 	 * to change any other bits, along with setting an ACL,
   5189 	 * gives NFS4ERR_INVAL.
   5190 	 */
   5191 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
   5192 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
   5193 		vattr_t va;
   5194 
   5195 		va.va_mask = AT_MODE;
   5196 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
   5197 		if (error) {
   5198 			status = puterrno4(error);
   5199 			goto done;
   5200 		}
   5201 		if ((sarg.vap->va_mode ^ va.va_mode) &
   5202 		    ~(VSUID | VSGID | VSVTX)) {
   5203 			status = NFS4ERR_INVAL;
   5204 			goto done;
   5205 		}
   5206 	}
   5207 
   5208 	/* Check stateid only if size has been set */
   5209 	if (sarg.vap->va_mask & AT_SIZE) {
   5210 		trunc = (sarg.vap->va_size == 0);
   5211 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
   5212 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
   5213 		if (status != NFS4_OK)
   5214 			goto done;
   5215 	} else {
   5216 		ct.cc_sysid = 0;
   5217 		ct.cc_pid = 0;
   5218 		ct.cc_caller_id = nfs4_srv_caller_id;
   5219 		ct.cc_flags = CC_DONTBLOCK;
   5220 	}
   5221 
   5222 	/* XXX start of possible race with delegations */
   5223 
   5224 	/*
   5225 	 * We need to specially handle size changes because it is
   5226 	 * possible for the client to create a file with read-only
   5227 	 * modes, but with the file opened for writing. If the client
   5228 	 * then tries to set the file size, e.g. ftruncate(3C),
   5229 	 * fcntl(F_FREESP), the normal access checking done in
   5230 	 * VOP_SETATTR would prevent the client from doing it even though
   5231 	 * it should be allowed to do so.  To get around this, we do the
   5232 	 * access checking for ourselves and use VOP_SPACE which doesn't
   5233 	 * do the access checking.
   5234 	 * Also the client should not be allowed to change the file
   5235 	 * size if there is a conflicting non-blocking mandatory lock in
   5236 	 * the region of the change.
   5237 	 */
   5238 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
   5239 		u_offset_t offset;
   5240 		ssize_t length;
   5241 
   5242 		/*
   5243 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
   5244 		 * before returning, sarg.vap->va_mask is used to
   5245 		 * generate the setattr reply bitmap.  We also clear
   5246 		 * AT_SIZE below before calling VOP_SPACE.  For both
   5247 		 * of these cases, the va_mask needs to be saved here
   5248 		 * and restored after calling VOP_SETATTR.
   5249 		 */
   5250 		saved_mask = sarg.vap->va_mask;
   5251 
   5252 		/*
   5253 		 * Check any possible conflict due to NBMAND locks.
   5254 		 * Get into critical region before VOP_GETATTR, so the
   5255 		 * size attribute is valid when checking conflicts.
   5256 		 */
   5257 		if (nbl_need_check(vp)) {
   5258 			nbl_start_crit(vp, RW_READER);
   5259 			in_crit = 1;
   5260 		}
   5261 
   5262 		bva.va_mask = AT_UID|AT_SIZE;
   5263 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
   5264 			status = puterrno4(error);
   5265 			goto done;
   5266 		}
   5267 
   5268 		if (in_crit) {
   5269 			if (sarg.vap->va_size < bva.va_size) {
   5270 				offset = sarg.vap->va_size;
   5271 				length = bva.va_size - sarg.vap->va_size;
   5272 			} else {
   5273 				offset = bva.va_size;
   5274 				length = sarg.vap->va_size - bva.va_size;
   5275 			}
   5276 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
   5277 			    &ct)) {
   5278 				status = NFS4ERR_LOCKED;
   5279 				goto done;
   5280 			}
   5281 		}
   5282 
   5283 		if (crgetuid(cr) == bva.va_uid) {
   5284 			sarg.vap->va_mask &= ~AT_SIZE;
   5285 			bf.l_type = F_WRLCK;
   5286 			bf.l_whence = 0;
   5287 			bf.l_start = (off64_t)sarg.vap->va_size;
   5288 			bf.l_len = 0;
   5289 			bf.l_sysid = 0;
   5290 			bf.l_pid = 0;
   5291 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
   5292 			    (offset_t)sarg.vap->va_size, cr, &ct);
   5293 		}
   5294 	}
   5295 
   5296 	if (!error && sarg.vap->va_mask != 0)
   5297 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
   5298 
   5299 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
   5300 	if (saved_mask & AT_SIZE)
   5301 		sarg.vap->va_mask |= AT_SIZE;
   5302 
   5303 	/*
   5304 	 * If an ACL was being set, it has been delayed until now,
   5305 	 * in order to set the mode (via the VOP_SETATTR() above) first.
   5306 	 */
   5307 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
   5308 		int i;
   5309 
   5310 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
   5311 			if (ntov.amap[i] == FATTR4_ACL)
   5312 				break;
   5313 		if (i < NFS4_MAXNUM_ATTRS) {
   5314 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
   5315 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
   5316 			if (error == 0) {
   5317 				*resp |= FATTR4_ACL_MASK;
   5318 			} else if (error == ENOTSUP) {
   5319 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
   5320 				status = NFS4ERR_ATTRNOTSUPP;
   5321 				goto done;
   5322 			}
   5323 		} else {
   5324 			NFS4_DEBUG(rfs4_debug,
   5325 			    (CE_NOTE, "do_rfs4_op_setattr: "
   5326 			    "unable to find ACL in fattr4"));
   5327 			error = EINVAL;
   5328 		}
   5329 	}
   5330 
   5331 	if (error) {
   5332 		/* check if a monitor detected a delegation conflict */
   5333 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
   5334 			status = NFS4ERR_DELAY;
   5335 		else
   5336 			status = puterrno4(error);
   5337 
   5338 		/*
   5339 		 * Set the response bitmap when setattr failed.
   5340 		 * If VOP_SETATTR partially succeeded, test by doing a
   5341 		 * VOP_GETATTR on the object and comparing the data
   5342 		 * to the setattr arguments.
   5343 		 */
   5344 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
   5345 	} else {
   5346 		/*
   5347 		 * Force modified metadata out to stable storage.
   5348 		 */
   5349 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
   5350 		/*
   5351 		 * Set response bitmap
   5352 		 */
   5353 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
   5354 	}
   5355 
   5356 /* Return early and already have a NFSv4 error */
   5357 done:
   5358 	/*
   5359 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
   5360 	 * conversion sets both readable and writeable NFS4 attrs
   5361 	 * for AT_MTIME and AT_ATIME.  The line below masks out
   5362 	 * unrequested attrs from the setattr result bitmap.  This
   5363 	 * is placed after the done: label to catch the ATTRNOTSUP
   5364 	 * case.
   5365 	 */
   5366 	*resp &= fattrp->attrmask;
   5367 
   5368 	if (in_crit)
   5369 		nbl_end_crit(vp);
   5370 
   5371 	nfs4_ntov_table_free(&ntov, &sarg);
   5372 
   5373 	return (status);
   5374 }
   5375 
   5376 /* ARGSUSED */
   5377 static void
   5378 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5379     struct compound_state *cs)
   5380 {
   5381 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
   5382 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
   5383 	bslabel_t *clabel;
   5384 
   5385 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
   5386 	    SETATTR4args *, args);
   5387 
   5388 	if (cs->vp == NULL) {
   5389 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5390 		goto out;
   5391 	}
   5392 
   5393 	/*
   5394 	 * If there is an unshared filesystem mounted on this vnode,
   5395 	 * do not allow to setattr on this vnode.
   5396 	 */
   5397 	if (vn_ismntpt(cs->vp)) {
   5398 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5399 		goto out;
   5400 	}
   5401 
   5402 	resp->attrsset = 0;
   5403 
   5404 	if (rdonly4(cs->exi, cs->vp, req)) {
   5405 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   5406 		goto out;
   5407 	}
   5408 
   5409 	/* check label before setting attributes */
   5410 	if (is_system_labeled()) {
   5411 		ASSERT(req->rq_label != NULL);
   5412 		clabel = req->rq_label;
   5413 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
   5414 		    "got client label from request(1)",
   5415 		    struct svc_req *, req);
   5416 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   5417 			if (!do_rfs_label_check(clabel, cs->vp,
   5418 			    EQUALITY_CHECK, cs->exi)) {
   5419 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5420 				goto out;
   5421 			}
   5422 		}
   5423 	}
   5424 
   5425 	*cs->statusp = resp->status =
   5426 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
   5427 	    &args->stateid);
   5428 
   5429 out:
   5430 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
   5431 	    SETATTR4res *, resp);
   5432 }
   5433 
   5434 /* ARGSUSED */
   5435 static void
   5436 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5437     struct compound_state *cs)
   5438 {
   5439 	/*
   5440 	 * verify and nverify are exactly the same, except that nverify
   5441 	 * succeeds when some argument changed, and verify succeeds when
   5442 	 * when none changed.
   5443 	 */
   5444 
   5445 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
   5446 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
   5447 
   5448 	int error;
   5449 	struct nfs4_svgetit_arg sarg;
   5450 	struct statvfs64 sb;
   5451 	struct nfs4_ntov_table ntov;
   5452 
   5453 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
   5454 	    VERIFY4args *, args);
   5455 
   5456 	if (cs->vp == NULL) {
   5457 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5458 		goto out;
   5459 	}
   5460 
   5461 	sarg.sbp = &sb;
   5462 	sarg.is_referral = B_FALSE;
   5463 	nfs4_ntov_table_init(&ntov);
   5464 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   5465 	    &sarg, &ntov, NFS4ATTR_VERIT);
   5466 	if (resp->status != NFS4_OK) {
   5467 		/*
   5468 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   5469 		 * so could return -1 for "no match".
   5470 		 */
   5471 		if (resp->status == -1)
   5472 			resp->status = NFS4ERR_NOT_SAME;
   5473 		goto done;
   5474 	}
   5475 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   5476 	switch (error) {
   5477 	case 0:
   5478 		resp->status = NFS4_OK;
   5479 		break;
   5480 	case -1:
   5481 		resp->status = NFS4ERR_NOT_SAME;
   5482 		break;
   5483 	default:
   5484 		resp->status = puterrno4(error);
   5485 		break;
   5486 	}
   5487 done:
   5488 	*cs->statusp = resp->status;
   5489 	nfs4_ntov_table_free(&ntov, &sarg);
   5490 out:
   5491 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
   5492 	    VERIFY4res *, resp);
   5493 }
   5494 
   5495 /* ARGSUSED */
   5496 static void
   5497 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5498     struct compound_state *cs)
   5499 {
   5500 	/*
   5501 	 * verify and nverify are exactly the same, except that nverify
   5502 	 * succeeds when some argument changed, and verify succeeds when
   5503 	 * when none changed.
   5504 	 */
   5505 
   5506 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
   5507 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
   5508 
   5509 	int error;
   5510 	struct nfs4_svgetit_arg sarg;
   5511 	struct statvfs64 sb;
   5512 	struct nfs4_ntov_table ntov;
   5513 
   5514 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
   5515 	    NVERIFY4args *, args);
   5516 
   5517 	if (cs->vp == NULL) {
   5518 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5519 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   5520 		    NVERIFY4res *, resp);
   5521 		return;
   5522 	}
   5523 	sarg.sbp = &sb;
   5524 	sarg.is_referral = B_FALSE;
   5525 	nfs4_ntov_table_init(&ntov);
   5526 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
   5527 	    &sarg, &ntov, NFS4ATTR_VERIT);
   5528 	if (resp->status != NFS4_OK) {
   5529 		/*
   5530 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
   5531 		 * so could return -1 for "no match".
   5532 		 */
   5533 		if (resp->status == -1)
   5534 			resp->status = NFS4_OK;
   5535 		goto done;
   5536 	}
   5537 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
   5538 	switch (error) {
   5539 	case 0:
   5540 		resp->status = NFS4ERR_SAME;
   5541 		break;
   5542 	case -1:
   5543 		resp->status = NFS4_OK;
   5544 		break;
   5545 	default:
   5546 		resp->status = puterrno4(error);
   5547 		break;
   5548 	}
   5549 done:
   5550 	*cs->statusp = resp->status;
   5551 	nfs4_ntov_table_free(&ntov, &sarg);
   5552 
   5553 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
   5554 	    NVERIFY4res *, resp);
   5555 }
   5556 
   5557 /*
   5558  * XXX - This should live in an NFS header file.
   5559  */
   5560 #define	MAX_IOVECS	12
   5561 
   5562 /* ARGSUSED */
   5563 static void
   5564 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
   5565     struct compound_state *cs)
   5566 {
   5567 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
   5568 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
   5569 	int error;
   5570 	vnode_t *vp;
   5571 	struct vattr bva;
   5572 	u_offset_t rlimit;
   5573 	struct uio uio;
   5574 	struct iovec iov[MAX_IOVECS];
   5575 	struct iovec *iovp;
   5576 	int iovcnt;
   5577 	int ioflag;
   5578 	cred_t *savecred, *cr;
   5579 	bool_t *deleg = &cs->deleg;
   5580 	nfsstat4 stat;
   5581 	int in_crit = 0;
   5582 	caller_context_t ct;
   5583 
   5584 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
   5585 	    WRITE4args *, args);
   5586 
   5587 	vp = cs->vp;
   5588 	if (vp == NULL) {
   5589 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   5590 		goto out;
   5591 	}
   5592 	if (cs->access == CS_ACCESS_DENIED) {
   5593 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5594 		goto out;
   5595 	}
   5596 
   5597 	cr = cs->cr;
   5598 
   5599 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
   5600 	    deleg, TRUE, &ct)) != NFS4_OK) {
   5601 		*cs->statusp = resp->status = stat;
   5602 		goto out;
   5603 	}
   5604 
   5605 	/*
   5606 	 * We have to enter the critical region before calling VOP_RWLOCK
   5607 	 * to avoid a deadlock with ufs.
   5608 	 */
   5609 	if (nbl_need_check(vp)) {
   5610 		nbl_start_crit(vp, RW_READER);
   5611 		in_crit = 1;
   5612 		if (nbl_conflict(vp, NBL_WRITE,
   5613 		    args->offset, args->data_len, 0, &ct)) {
   5614 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
   5615 			goto out;
   5616 		}
   5617 	}
   5618 
   5619 	bva.va_mask = AT_MODE | AT_UID;
   5620 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
   5621 
   5622 	/*
   5623 	 * If we can't get the attributes, then we can't do the
   5624 	 * right access checking.  So, we'll fail the request.
   5625 	 */
   5626 	if (error) {
   5627 		*cs->statusp = resp->status = puterrno4(error);
   5628 		goto out;
   5629 	}
   5630 
   5631 	if (rdonly4(cs->exi, cs->vp, req)) {
   5632 		*cs->statusp = resp->status = NFS4ERR_ROFS;
   5633 		goto out;
   5634 	}
   5635 
   5636 	if (vp->v_type != VREG) {
   5637 		*cs->statusp = resp->status =
   5638 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
   5639 		goto out;
   5640 	}
   5641 
   5642 	if (crgetuid(cr) != bva.va_uid &&
   5643 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
   5644 		*cs->statusp = resp->status = puterrno4(error);
   5645 		goto out;
   5646 	}
   5647 
   5648 	if (MANDLOCK(vp, bva.va_mode)) {
   5649 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   5650 		goto out;
   5651 	}
   5652 
   5653 	if (args->data_len == 0) {
   5654 		*cs->statusp = resp->status = NFS4_OK;
   5655 		resp->count = 0;
   5656 		resp->committed = args->stable;
   5657 		resp->writeverf = Write4verf;
   5658 		goto out;
   5659 	}
   5660 
   5661 	if (args->mblk != NULL) {
   5662 		mblk_t *m;
   5663 		uint_t bytes, round_len;
   5664 
   5665 		iovcnt = 0;
   5666 		bytes = 0;
   5667 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
   5668 		for (m = args->mblk;
   5669 		    m != NULL && bytes < round_len;
   5670 		    m = m->b_cont) {
   5671 			iovcnt++;
   5672 			bytes += MBLKL(m);
   5673 		}
   5674 #ifdef DEBUG
   5675 		/* should have ended on an mblk boundary */
   5676 		if (bytes != round_len) {
   5677 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
   5678 			    bytes, round_len, args->data_len);
   5679 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
   5680 			    (void *)args->mblk, (void *)m);
   5681 			ASSERT(bytes == round_len);
   5682 		}
   5683 #endif
   5684 		if (iovcnt <= MAX_IOVECS) {
   5685 			iovp = iov;
   5686 		} else {
   5687 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
   5688 		}
   5689 		mblk_to_iov(args->mblk, iovcnt, iovp);
   5690 	} else if (args->rlist != NULL) {
   5691 		iovcnt = 1;
   5692 		iovp = iov;
   5693 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
   5694 		iovp->iov_len = args->data_len;
   5695 	} else {
   5696 		iovcnt = 1;
   5697 		iovp = iov;
   5698 		iovp->iov_base = args->data_val;
   5699 		iovp->iov_len = args->data_len;
   5700 	}
   5701 
   5702 	uio.uio_iov = iovp;
   5703 	uio.uio_iovcnt = iovcnt;
   5704 
   5705 	uio.uio_segflg = UIO_SYSSPACE;
   5706 	uio.uio_extflg = UIO_COPY_DEFAULT;
   5707 	uio.uio_loffset = args->offset;
   5708 	uio.uio_resid = args->data_len;
   5709 	uio.uio_llimit = curproc->p_fsz_ctl;
   5710 	rlimit = uio.uio_llimit - args->offset;
   5711 	if (rlimit < (u_offset_t)uio.uio_resid)
   5712 		uio.uio_resid = (int)rlimit;
   5713 
   5714 	if (args->stable == UNSTABLE4)
   5715 		ioflag = 0;
   5716 	else if (args->stable == FILE_SYNC4)
   5717 		ioflag = FSYNC;
   5718 	else if (args->stable == DATA_SYNC4)
   5719 		ioflag = FDSYNC;
   5720 	else {
   5721 		if (iovp != iov)
   5722 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
   5723 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   5724 		goto out;
   5725 	}
   5726 
   5727 	/*
   5728 	 * We're changing creds because VM may fault and we need
   5729 	 * the cred of the current thread to be used if quota
   5730 	 * checking is enabled.
   5731 	 */
   5732 	savecred = curthread->t_cred;
   5733 	curthread->t_cred = cr;
   5734 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
   5735 	curthread->t_cred = savecred;
   5736 
   5737 	if (iovp != iov)
   5738 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
   5739 
   5740 	if (error) {
   5741 		*cs->statusp = resp->status = puterrno4(error);
   5742 		goto out;
   5743 	}
   5744 
   5745 	*cs->statusp = resp->status = NFS4_OK;
   5746 	resp->count = args->data_len - uio.uio_resid;
   5747 
   5748 	if (ioflag == 0)
   5749 		resp->committed = UNSTABLE4;
   5750 	else
   5751 		resp->committed = FILE_SYNC4;
   5752 
   5753 	resp->writeverf = Write4verf;
   5754 
   5755 out:
   5756 	if (in_crit)
   5757 		nbl_end_crit(vp);
   5758 
   5759 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
   5760 	    WRITE4res *, resp);
   5761 }
   5762 
   5763 
   5764 /* XXX put in a header file */
   5765 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
   5766 
   5767 void
   5768 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
   5769     struct svc_req *req, cred_t *cr, int *rv)
   5770 {
   5771 	uint_t i;
   5772 	struct compound_state cs;
   5773 
   5774 	if (rv != NULL)
   5775 		*rv = 0;
   5776 	rfs4_init_compound_state(&cs);
   5777 	/*
   5778 	 * Form a reply tag by copying over the reqeuest tag.
   5779 	 */
   5780 	resp->tag.utf8string_val =
   5781 	    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
   5782 	resp->tag.utf8string_len = args->tag.utf8string_len;
   5783 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
   5784 	    resp->tag.utf8string_len);
   5785 
   5786 	cs.statusp = &resp->status;
   5787 	cs.req = req;
   5788 
   5789 	/*
   5790 	 * XXX for now, minorversion should be zero
   5791 	 */
   5792 	if (args->minorversion != NFS4_MINORVERSION) {
   5793 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
   5794 		    &cs, COMPOUND4args *, args);
   5795 		resp->array_len = 0;
   5796 		resp->array = NULL;
   5797 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
   5798 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
   5799 		    &cs, COMPOUND4res *, resp);
   5800 		return;
   5801 	}
   5802 
   5803 	ASSERT(exi == NULL);
   5804 	ASSERT(cr == NULL);
   5805 
   5806 	cr = crget();
   5807 	ASSERT(cr != NULL);
   5808 
   5809 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
   5810 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
   5811 		    &cs, COMPOUND4args *, args);
   5812 		crfree(cr);
   5813 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
   5814 		    &cs, COMPOUND4res *, resp);
   5815 		svcerr_badcred(req->rq_xprt);
   5816 		if (rv != NULL)
   5817 			*rv = 1;
   5818 		return;
   5819 	}
   5820 	resp->array_len = args->array_len;
   5821 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
   5822 	    KM_SLEEP);
   5823 
   5824 	cs.basecr = cr;
   5825 
   5826 	DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
   5827 	    COMPOUND4args *, args);
   5828 
   5829 	/*
   5830 	 * For now, NFS4 compound processing must be protected by
   5831 	 * exported_lock because it can access more than one exportinfo
   5832 	 * per compound and share/unshare can now change multiple
   5833 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
   5834 	 * per proc (excluding public exinfo), and exi_count design
   5835 	 * is sufficient to protect concurrent execution of NFS2/3
   5836 	 * ops along with unexport.  This lock will be removed as
   5837 	 * part of the NFSv4 phase 2 namespace redesign work.
   5838 	 */
   5839 	rw_enter(&exported_lock, RW_READER);
   5840 
   5841 	/*
   5842 	 * If this is the first compound we've seen, we need to start all
   5843 	 * new instances' grace periods.
   5844 	 */
   5845 	if (rfs4_seen_first_compound == 0) {
   5846 		rfs4_grace_start_new();
   5847 		/*
   5848 		 * This must be set after rfs4_grace_start_new(), otherwise
   5849 		 * another thread could proceed past here before the former
   5850 		 * is finished.
   5851 		 */
   5852 		rfs4_seen_first_compound = 1;
   5853 	}
   5854 
   5855 	for (i = 0; i < args->array_len && cs.cont; i++) {
   5856 		nfs_argop4 *argop;
   5857 		nfs_resop4 *resop;
   5858 		uint_t op;
   5859 
   5860 		argop = &args->array[i];
   5861 		resop = &resp->array[i];
   5862 		resop->resop = argop->argop;
   5863 		op = (uint_t)resop->resop;
   5864 
   5865 		if (op < rfsv4disp_cnt) {
   5866 			/*
   5867 			 * Count the individual ops here; NULL and COMPOUND
   5868 			 * are counted in common_dispatch()
   5869 			 */
   5870 			rfsproccnt_v4_ptr[op].value.ui64++;
   5871 
   5872 			NFS4_DEBUG(rfs4_debug > 1,
   5873 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
   5874 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
   5875 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
   5876 			    rfs4_op_string[op], *cs.statusp));
   5877 			if (*cs.statusp != NFS4_OK)
   5878 				cs.cont = FALSE;
   5879 		} else {
   5880 			/*
   5881 			 * This is effectively dead code since XDR code
   5882 			 * will have already returned BADXDR if op doesn't
   5883 			 * decode to legal value.  This only done for a
   5884 			 * day when XDR code doesn't verify v4 opcodes.
   5885 			 */
   5886 			op = OP_ILLEGAL;
   5887 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
   5888 
   5889 			rfs4_op_illegal(argop, resop, req, &cs);
   5890 			cs.cont = FALSE;
   5891 		}
   5892 
   5893 		/*
   5894 		 * If not at last op, and if we are to stop, then
   5895 		 * compact the results array.
   5896 		 */
   5897 		if ((i + 1) < args->array_len && !cs.cont) {
   5898 			nfs_resop4 *new_res = kmem_alloc(
   5899 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
   5900 			bcopy(resp->array,
   5901 			    new_res, (i+1) * sizeof (nfs_resop4));
   5902 			kmem_free(resp->array,
   5903 			    args->array_len * sizeof (nfs_resop4));
   5904 
   5905 			resp->array_len =  i + 1;
   5906 			resp->array = new_res;
   5907 		}
   5908 	}
   5909 
   5910 	rw_exit(&exported_lock);
   5911 
   5912 	DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
   5913 	    COMPOUND4res *, resp);
   5914 
   5915 	if (cs.vp)
   5916 		VN_RELE(cs.vp);
   5917 	if (cs.saved_vp)
   5918 		VN_RELE(cs.saved_vp);
   5919 	if (cs.saved_fh.nfs_fh4_val)
   5920 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
   5921 
   5922 	if (cs.basecr)
   5923 		crfree(cs.basecr);
   5924 	if (cs.cr)
   5925 		crfree(cs.cr);
   5926 	/*
   5927 	 * done with this compound request, free the label
   5928 	 */
   5929 
   5930 	if (req->rq_label != NULL) {
   5931 		kmem_free(req->rq_label, sizeof (bslabel_t));
   5932 		req->rq_label = NULL;
   5933 	}
   5934 }
   5935 
   5936 /*
   5937  * XXX because of what appears to be duplicate calls to rfs4_compound_free
   5938  * XXX zero out the tag and array values. Need to investigate why the
   5939  * XXX calls occur, but at least prevent the panic for now.
   5940  */
   5941 void
   5942 rfs4_compound_free(COMPOUND4res *resp)
   5943 {
   5944 	uint_t i;
   5945 
   5946 	if (resp->tag.utf8string_val) {
   5947 		UTF8STRING_FREE(resp->tag)
   5948 	}
   5949 
   5950 	for (i = 0; i < resp->array_len; i++) {
   5951 		nfs_resop4 *resop;
   5952 		uint_t op;
   5953 
   5954 		resop = &resp->array[i];
   5955 		op = (uint_t)resop->resop;
   5956 		if (op < rfsv4disp_cnt) {
   5957 			(*rfsv4disptab[op].dis_resfree)(resop);
   5958 		}
   5959 	}
   5960 	if (resp->array != NULL) {
   5961 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
   5962 	}
   5963 }
   5964 
   5965 /*
   5966  * Process the value of the compound request rpc flags, as a bit-AND
   5967  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
   5968  */
   5969 void
   5970 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
   5971 {
   5972 	int i;
   5973 	int flag = RPC_ALL;
   5974 
   5975 	for (i = 0; flag && i < args->array_len; i++) {
   5976 		uint_t op;
   5977 
   5978 		op = (uint_t)args->array[i].argop;
   5979 
   5980 		if (op < rfsv4disp_cnt)
   5981 			flag &= rfsv4disptab[op].dis_flags;
   5982 		else
   5983 			flag = 0;
   5984 	}
   5985 	*flagp = flag;
   5986 }
   5987 
   5988 nfsstat4
   5989 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
   5990 {
   5991 	nfsstat4 e;
   5992 
   5993 	rfs4_dbe_lock(cp->rc_dbe);
   5994 
   5995 	if (cp->rc_sysidt != LM_NOSYSID) {
   5996 		*sp = cp->rc_sysidt;
   5997 		e = NFS4_OK;
   5998 
   5999 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
   6000 		*sp = cp->rc_sysidt;
   6001 		e = NFS4_OK;
   6002 
   6003 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
   6004 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
   6005 	} else
   6006 		e = NFS4ERR_DELAY;
   6007 
   6008 	rfs4_dbe_unlock(cp->rc_dbe);
   6009 	return (e);
   6010 }
   6011 
   6012 #if defined(DEBUG) && ! defined(lint)
   6013 static void lock_print(char *str, int operation, struct flock64 *flk)
   6014 {
   6015 	char *op, *type;
   6016 
   6017 	switch (operation) {
   6018 	case F_GETLK: op = "F_GETLK";
   6019 		break;
   6020 	case F_SETLK: op = "F_SETLK";
   6021 		break;
   6022 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
   6023 		break;
   6024 	default: op = "F_UNKNOWN";
   6025 		break;
   6026 	}
   6027 	switch (flk->l_type) {
   6028 	case F_UNLCK: type = "F_UNLCK";
   6029 		break;
   6030 	case F_RDLCK: type = "F_RDLCK";
   6031 		break;
   6032 	case F_WRLCK: type = "F_WRLCK";
   6033 		break;
   6034 	default: type = "F_UNKNOWN";
   6035 		break;
   6036 	}
   6037 
   6038 	ASSERT(flk->l_whence == 0);
   6039 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
   6040 	    str, op, type, (longlong_t)flk->l_start,
   6041 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
   6042 }
   6043 
   6044 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
   6045 #else
   6046 #define	LOCK_PRINT(d, s, t, f)
   6047 #endif
   6048 
   6049 /*ARGSUSED*/
   6050 static bool_t
   6051 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
   6052 {
   6053 	return (TRUE);
   6054 }
   6055 
   6056 /*
   6057  * Look up the pathname using the vp in cs as the directory vnode.
   6058  * cs->vp will be the vnode for the file on success
   6059  */
   6060 
   6061 static nfsstat4
   6062 rfs4_lookup(component4 *component, struct svc_req *req,
   6063     struct compound_state *cs)
   6064 {
   6065 	char *nm;
   6066 	uint32_t len;
   6067 	nfsstat4 status;
   6068 	struct sockaddr *ca;
   6069 	char *name;
   6070 
   6071 	if (cs->vp == NULL) {
   6072 		return (NFS4ERR_NOFILEHANDLE);
   6073 	}
   6074 	if (cs->vp->v_type != VDIR) {
   6075 		return (NFS4ERR_NOTDIR);
   6076 	}
   6077 
   6078 	if (!utf8_dir_verify(component))
   6079 		return (NFS4ERR_INVAL);
   6080 
   6081 	nm = utf8_to_fn(component, &len, NULL);
   6082 	if (nm == NULL) {
   6083 		return (NFS4ERR_INVAL);
   6084 	}
   6085 
   6086 	if (len > MAXNAMELEN) {
   6087 		kmem_free(nm, len);
   6088 		return (NFS4ERR_NAMETOOLONG);
   6089 	}
   6090 
   6091 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   6092 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   6093 	    MAXPATHLEN + 1);
   6094 
   6095 	if (name == NULL) {
   6096 		kmem_free(nm, len);
   6097 		return (NFS4ERR_INVAL);
   6098 	}
   6099 
   6100 	status = do_rfs4_op_lookup(name, req, cs);
   6101 
   6102 	if (name != nm)
   6103 		kmem_free(name, MAXPATHLEN + 1);
   6104 
   6105 	kmem_free(nm, len);
   6106 
   6107 	return (status);
   6108 }
   6109 
   6110 static nfsstat4
   6111 rfs4_lookupfile(component4 *component, struct svc_req *req,
   6112     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
   6113 {
   6114 	nfsstat4 status;
   6115 	vnode_t *dvp = cs->vp;
   6116 	vattr_t bva, ava, fva;
   6117 	int error;
   6118 
   6119 	/* Get "before" change value */
   6120 	bva.va_mask = AT_CTIME|AT_SEQ;
   6121 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   6122 	if (error)
   6123 		return (puterrno4(error));
   6124 
   6125 	/* rfs4_lookup may VN_RELE directory */
   6126 	VN_HOLD(dvp);
   6127 
   6128 	status = rfs4_lookup(component, req, cs);
   6129 	if (status != NFS4_OK) {
   6130 		VN_RELE(dvp);
   6131 		return (status);
   6132 	}
   6133 
   6134 	/*
   6135 	 * Get "after" change value, if it fails, simply return the
   6136 	 * before value.
   6137 	 */
   6138 	ava.va_mask = AT_CTIME|AT_SEQ;
   6139 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   6140 		ava.va_ctime = bva.va_ctime;
   6141 		ava.va_seq = 0;
   6142 	}
   6143 	VN_RELE(dvp);
   6144 
   6145 	/*
   6146 	 * Validate the file is a file
   6147 	 */
   6148 	fva.va_mask = AT_TYPE|AT_MODE;
   6149 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
   6150 	if (error)
   6151 		return (puterrno4(error));
   6152 
   6153 	if (fva.va_type != VREG) {
   6154 		if (fva.va_type == VDIR)
   6155 			return (NFS4ERR_ISDIR);
   6156 		if (fva.va_type == VLNK)
   6157 			return (NFS4ERR_SYMLINK);
   6158 		return (NFS4ERR_INVAL);
   6159 	}
   6160 
   6161 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
   6162 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   6163 
   6164 	/*
   6165 	 * It is undefined if VOP_LOOKUP will change va_seq, so
   6166 	 * cinfo.atomic = TRUE only if we have
   6167 	 * non-zero va_seq's, and they have not changed.
   6168 	 */
   6169 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
   6170 		cinfo->atomic = TRUE;
   6171 	else
   6172 		cinfo->atomic = FALSE;
   6173 
   6174 	/* Check for mandatory locking */
   6175 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
   6176 	return (check_open_access(access, cs, req));
   6177 }
   6178 
   6179 static nfsstat4
   6180 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
   6181     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
   6182 {
   6183 	int error;
   6184 	nfsstat4 status = NFS4_OK;
   6185 	vattr_t va;
   6186 
   6187 tryagain:
   6188 
   6189 	/*
   6190 	 * The file open mode used is VWRITE.  If the client needs
   6191 	 * some other semantic, then it should do the access checking
   6192 	 * itself.  It would have been nice to have the file open mode
   6193 	 * passed as part of the arguments.
   6194 	 */
   6195 
   6196 	*created = TRUE;
   6197 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
   6198 
   6199 	if (error) {
   6200 		*created = FALSE;
   6201 
   6202 		/*
   6203 		 * If we got something other than file already exists
   6204 		 * then just return this error.  Otherwise, we got
   6205 		 * EEXIST.  If we were doing a GUARDED create, then
   6206 		 * just return this error.  Otherwise, we need to
   6207 		 * make sure that this wasn't a duplicate of an
   6208 		 * exclusive create request.
   6209 		 *
   6210 		 * The assumption is made that a non-exclusive create
   6211 		 * request will never return EEXIST.
   6212 		 */
   6213 
   6214 		if (error != EEXIST || mode == GUARDED4) {
   6215 			status = puterrno4(error);
   6216 			return (status);
   6217 		}
   6218 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
   6219 		    NULL, NULL, NULL);
   6220 
   6221 		if (error) {
   6222 			/*
   6223 			 * We couldn't find the file that we thought that
   6224 			 * we just created.  So, we'll just try creating
   6225 			 * it again.
   6226 			 */
   6227 			if (error == ENOENT)
   6228 				goto tryagain;
   6229 
   6230 			status = puterrno4(error);
   6231 			return (status);
   6232 		}
   6233 
   6234 		if (mode == UNCHECKED4) {
   6235 			/* existing object must be regular file */
   6236 			if ((*vpp)->v_type != VREG) {
   6237 				if ((*vpp)->v_type == VDIR)
   6238 					status = NFS4ERR_ISDIR;
   6239 				else if ((*vpp)->v_type == VLNK)
   6240 					status = NFS4ERR_SYMLINK;
   6241 				else
   6242 					status = NFS4ERR_INVAL;
   6243 				VN_RELE(*vpp);
   6244 				return (status);
   6245 			}
   6246 
   6247 			return (NFS4_OK);
   6248 		}
   6249 
   6250 		/* Check for duplicate request */
   6251 		ASSERT(mtime != 0);
   6252 		va.va_mask = AT_MTIME;
   6253 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
   6254 		if (!error) {
   6255 			/* We found the file */
   6256 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
   6257 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
   6258 				/* but its not our creation */
   6259 				VN_RELE(*vpp);
   6260 				return (NFS4ERR_EXIST);
   6261 			}
   6262 			*created = TRUE; /* retrans of create == created */
   6263 			return (NFS4_OK);
   6264 		}
   6265 		VN_RELE(*vpp);
   6266 		return (NFS4ERR_EXIST);
   6267 	}
   6268 
   6269 	return (NFS4_OK);
   6270 }
   6271 
   6272 static nfsstat4
   6273 check_open_access(uint32_t access, struct compound_state *cs,
   6274     struct svc_req *req)
   6275 {
   6276 	int error;
   6277 	vnode_t *vp;
   6278 	bool_t readonly;
   6279 	cred_t *cr = cs->cr;
   6280 
   6281 	/* For now we don't allow mandatory locking as per V2/V3 */
   6282 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
   6283 		return (NFS4ERR_ACCESS);
   6284 	}
   6285 
   6286 	vp = cs->vp;
   6287 	ASSERT(cr != NULL && vp->v_type == VREG);
   6288 
   6289 	/*
   6290 	 * If the file system is exported read only and we are trying
   6291 	 * to open for write, then return NFS4ERR_ROFS
   6292 	 */
   6293 
   6294 	readonly = rdonly4(cs->exi, cs->vp, req);
   6295 
   6296 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
   6297 		return (NFS4ERR_ROFS);
   6298 
   6299 	if (access & OPEN4_SHARE_ACCESS_READ) {
   6300 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
   6301 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
   6302 			return (NFS4ERR_ACCESS);
   6303 		}
   6304 	}
   6305 
   6306 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
   6307 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
   6308 		if (error)
   6309 			return (NFS4ERR_ACCESS);
   6310 	}
   6311 
   6312 	return (NFS4_OK);
   6313 }
   6314 
   6315 static nfsstat4
   6316 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
   6317     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
   6318 {
   6319 	struct nfs4_svgetit_arg sarg;
   6320 	struct nfs4_ntov_table ntov;
   6321 
   6322 	bool_t ntov_table_init = FALSE;
   6323 	struct statvfs64 sb;
   6324 	nfsstat4 status;
   6325 	vnode_t *vp;
   6326 	vattr_t bva, ava, iva, cva, *vap;
   6327 	vnode_t *dvp;
   6328 	timespec32_t *mtime;
   6329 	char *nm = NULL;
   6330 	uint_t buflen;
   6331 	bool_t created;
   6332 	bool_t setsize = FALSE;
   6333 	len_t reqsize;
   6334 	int error;
   6335 	bool_t trunc;
   6336 	caller_context_t ct;
   6337 	component4 *component;
   6338 	bslabel_t *clabel;
   6339 	struct sockaddr *ca;
   6340 	char *name = NULL;
   6341 
   6342 	sarg.sbp = &sb;
   6343 	sarg.is_referral = B_FALSE;
   6344 
   6345 	dvp = cs->vp;
   6346 
   6347 	/* Check if the file system is read only */
   6348 	if (rdonly4(cs->exi, dvp, req))
   6349 		return (NFS4ERR_ROFS);
   6350 
   6351 	/* check the label of including directory */
   6352 	if (is_system_labeled()) {
   6353 		ASSERT(req->rq_label != NULL);
   6354 		clabel = req->rq_label;
   6355 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
   6356 		    "got client label from request(1)",
   6357 		    struct svc_req *, req);
   6358 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
   6359 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
   6360 			    cs->exi)) {
   6361 				return (NFS4ERR_ACCESS);
   6362 			}
   6363 		}
   6364 	}
   6365 
   6366 	/*
   6367 	 * Get the last component of path name in nm. cs will reference
   6368 	 * the including directory on success.
   6369 	 */
   6370 	component = &args->open_claim4_u.file;
   6371 	if (!utf8_dir_verify(component))
   6372 		return (NFS4ERR_INVAL);
   6373 
   6374 	nm = utf8_to_fn(component, &buflen, NULL);
   6375 
   6376 	if (nm == NULL)
   6377 		return (NFS4ERR_RESOURCE);
   6378 
   6379 	if (buflen > MAXNAMELEN) {
   6380 		kmem_free(nm, buflen);
   6381 		return (NFS4ERR_NAMETOOLONG);
   6382 	}
   6383 
   6384 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
   6385 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
   6386 	if (error) {
   6387 		kmem_free(nm, buflen);
   6388 		return (puterrno4(error));
   6389 	}
   6390 
   6391 	if (bva.va_type != VDIR) {
   6392 		kmem_free(nm, buflen);
   6393 		return (NFS4ERR_NOTDIR);
   6394 	}
   6395 
   6396 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
   6397 
   6398 	switch (args->mode) {
   6399 	case GUARDED4:
   6400 		/*FALLTHROUGH*/
   6401 	case UNCHECKED4:
   6402 		nfs4_ntov_table_init(&ntov);
   6403 		ntov_table_init = TRUE;
   6404 
   6405 		*attrset = 0;
   6406 		status = do_rfs4_set_attrs(attrset,
   6407 		    &args->createhow4_u.createattrs,
   6408 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
   6409 
   6410 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
   6411 		    sarg.vap->va_type != VREG) {
   6412 			if (sarg.vap->va_type == VDIR)
   6413 				status = NFS4ERR_ISDIR;
   6414 			else if (sarg.vap->va_type == VLNK)
   6415 				status = NFS4ERR_SYMLINK;
   6416 			else
   6417 				status = NFS4ERR_INVAL;
   6418 		}
   6419 
   6420 		if (status != NFS4_OK) {
   6421 			kmem_free(nm, buflen);
   6422 			nfs4_ntov_table_free(&ntov, &sarg);
   6423 			*attrset = 0;
   6424 			return (status);
   6425 		}
   6426 
   6427 		vap = sarg.vap;
   6428 		vap->va_type = VREG;
   6429 		vap->va_mask |= AT_TYPE;
   6430 
   6431 		if ((vap->va_mask & AT_MODE) == 0) {
   6432 			vap->va_mask |= AT_MODE;
   6433 			vap->va_mode = (mode_t)0600;
   6434 		}
   6435 
   6436 		if (vap->va_mask & AT_SIZE) {
   6437 
   6438 			/* Disallow create with a non-zero size */
   6439 
   6440 			if ((reqsize = sarg.vap->va_size) != 0) {
   6441 				kmem_free(nm, buflen);
   6442 				nfs4_ntov_table_free(&ntov, &sarg);
   6443 				*attrset = 0;
   6444 				return (NFS4ERR_INVAL);
   6445 			}
   6446 			setsize = TRUE;
   6447 		}
   6448 		break;
   6449 
   6450 	case EXCLUSIVE4:
   6451 		/* prohibit EXCL create of named attributes */
   6452 		if (dvp->v_flag & V_XATTRDIR) {
   6453 			kmem_free(nm, buflen);
   6454 			*attrset = 0;
   6455 			return (NFS4ERR_INVAL);
   6456 		}
   6457 
   6458 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
   6459 		cva.va_type = VREG;
   6460 		/*
   6461 		 * Ensure no time overflows. Assumes underlying
   6462 		 * filesystem supports at least 32 bits.
   6463 		 * Truncate nsec to usec resolution to allow valid
   6464 		 * compares even if the underlying filesystem truncates.
   6465 		 */
   6466 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
   6467 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
   6468 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
   6469 		cva.va_mode = (mode_t)0;
   6470 		vap = &cva;
   6471 
   6472 		/*
   6473 		 * For EXCL create, attrset is set to the server attr
   6474 		 * used to cache the client's verifier.
   6475 		 */
   6476 		*attrset = FATTR4_TIME_MODIFY_MASK;
   6477 		break;
   6478 	}
   6479 
   6480 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
   6481 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
   6482 	    MAXPATHLEN  + 1);
   6483 
   6484 	if (name == NULL) {
   6485 		kmem_free(nm, buflen);
   6486 		return (NFS4ERR_SERVERFAULT);
   6487 	}
   6488 
   6489 	status = create_vnode(dvp, name, vap, args->mode, mtime,
   6490 	    cs->cr, &vp, &created);
   6491 	if (nm != name)
   6492 		kmem_free(name, MAXPATHLEN + 1);
   6493 	kmem_free(nm, buflen);
   6494 
   6495 	if (status != NFS4_OK) {
   6496 		if (ntov_table_init)
   6497 			nfs4_ntov_table_free(&ntov, &sarg);
   6498 		*attrset = 0;
   6499 		return (status);
   6500 	}
   6501 
   6502 	trunc = (setsize && !created);
   6503 
   6504 	if (args->mode != EXCLUSIVE4) {
   6505 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
   6506 
   6507 		/*
   6508 		 * True verification that object was created with correct
   6509 		 * attrs is impossible.  The attrs could have been changed
   6510 		 * immediately after object creation.  If attributes did
   6511 		 * not verify, the only recourse for the server is to
   6512 		 * destroy the object.  Maybe if some attrs (like gid)
   6513 		 * are set incorrectly, the object should be destroyed;
   6514 		 * however, seems bad as a default policy.  Do we really
   6515 		 * want to destroy an object over one of the times not
   6516 		 * verifying correctly?  For these reasons, the server
   6517 		 * currently sets bits in attrset for createattrs
   6518 		 * that were set; however, no verification is done.
   6519 		 *
   6520 		 * vmask_to_nmask accounts for vattr bits set on create
   6521 		 *	[do_rfs4_set_attrs() only sets resp bits for
   6522 		 *	 non-vattr/vfs bits.]
   6523 		 * Mask off any bits we set by default so as not to return
   6524 		 * more attrset bits than were requested in createattrs
   6525 		 */
   6526 		if (created) {
   6527 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
   6528 			*attrset &= createmask;
   6529 		} else {
   6530 			/*
   6531 			 * We did not create the vnode (we tried but it
   6532 			 * already existed).  In this case, the only createattr
   6533 			 * that the spec allows the server to set is size,
   6534 			 * and even then, it can only be set if it is 0.
   6535 			 */
   6536 			*attrset = 0;
   6537 			if (trunc)
   6538 				*attrset = FATTR4_SIZE_MASK;
   6539 		}
   6540 	}
   6541 	if (ntov_table_init)
   6542 		nfs4_ntov_table_free(&ntov, &sarg);
   6543 
   6544 	/*
   6545 	 * Get the initial "after" sequence number, if it fails,
   6546 	 * set to zero, time to before.
   6547 	 */
   6548 	iva.va_mask = AT_CTIME|AT_SEQ;
   6549 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
   6550 		iva.va_seq = 0;
   6551 		iva.va_ctime = bva.va_ctime;
   6552 	}
   6553 
   6554 	/*
   6555 	 * create_vnode attempts to create the file exclusive,
   6556 	 * if it already exists the VOP_CREATE will fail and
   6557 	 * may not increase va_seq. It is atomic if
   6558 	 * we haven't changed the directory, but if it has changed
   6559 	 * we don't know what changed it.
   6560 	 */
   6561 	if (!created) {
   6562 		if (bva.va_seq && iva.va_seq &&
   6563 		    bva.va_seq == iva.va_seq)
   6564 			cinfo->atomic = TRUE;
   6565 		else
   6566 			cinfo->atomic = FALSE;
   6567 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
   6568 	} else {
   6569 		/*
   6570 		 * The entry was created, we need to sync the
   6571 		 * directory metadata.
   6572 		 */
   6573 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
   6574 
   6575 		/*
   6576 		 * Get "after" change value, if it fails, simply return the
   6577 		 * before value.
   6578 		 */
   6579 		ava.va_mask = AT_CTIME|AT_SEQ;
   6580 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
   6581 			ava.va_ctime = bva.va_ctime;
   6582 			ava.va_seq = 0;
   6583 		}
   6584 
   6585 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
   6586 
   6587 		/*
   6588 		 * The cinfo->atomic = TRUE only if we have
   6589 		 * non-zero va_seq's, and it has incremented by exactly one
   6590 		 * during the create_vnode and it didn't
   6591 		 * change during the VOP_FSYNC.
   6592 		 */
   6593 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
   6594 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
   6595 			cinfo->atomic = TRUE;
   6596 		else
   6597 			cinfo->atomic = FALSE;
   6598 	}
   6599 
   6600 	/* Check for mandatory locking and that the size gets set. */
   6601 	cva.va_mask = AT_MODE;
   6602 	if (setsize)
   6603 		cva.va_mask |= AT_SIZE;
   6604 
   6605 	/* Assume the worst */
   6606 	cs->mandlock = TRUE;
   6607 
   6608 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
   6609 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
   6610 
   6611 		/*
   6612 		 * Truncate the file if necessary; this would be
   6613 		 * the case for create over an existing file.
   6614 		 */
   6615 
   6616 		if (trunc) {
   6617 			int in_crit = 0;
   6618 			rfs4_file_t *fp;
   6619 			bool_t create = FALSE;
   6620 
   6621 			/*
   6622 			 * We are writing over an existing file.
   6623 			 * Check to see if we need to recall a delegation.
   6624 			 */
   6625 			rfs4_hold_deleg_policy();
   6626 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
   6627 				if (rfs4_check_delegated_byfp(FWRITE, fp,
   6628 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
   6629 					rfs4_file_rele(fp);
   6630 					rfs4_rele_deleg_policy();
   6631 					VN_RELE(vp);
   6632 					*attrset = 0;
   6633 					return (NFS4ERR_DELAY);
   6634 				}
   6635 				rfs4_file_rele(fp);
   6636 			}
   6637 			rfs4_rele_deleg_policy();
   6638 
   6639 			if (nbl_need_check(vp)) {
   6640 				in_crit = 1;
   6641 
   6642 				ASSERT(reqsize == 0);
   6643 
   6644 				nbl_start_crit(vp, RW_READER);
   6645 				if (nbl_conflict(vp, NBL_WRITE, 0,
   6646 				    cva.va_size, 0, NULL)) {
   6647 					in_crit = 0;
   6648 					nbl_end_crit(vp);
   6649 					VN_RELE(vp);
   6650 					*attrset = 0;
   6651 					return (NFS4ERR_ACCESS);
   6652 				}
   6653 			}
   6654 			ct.cc_sysid = 0;
   6655 			ct.cc_pid = 0;
   6656 			ct.cc_caller_id = nfs4_srv_caller_id;
   6657 			ct.cc_flags = CC_DONTBLOCK;
   6658 
   6659 			cva.va_mask = AT_SIZE;
   6660 			cva.va_size = reqsize;
   6661 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
   6662 			if (in_crit)
   6663 				nbl_end_crit(vp);
   6664 		}
   6665 	}
   6666 
   6667 	error = makefh4(&cs->fh, vp, cs->exi);
   6668 
   6669 	/*
   6670 	 * Force modified data and metadata out to stable storage.
   6671 	 */
   6672 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
   6673 
   6674 	if (error) {
   6675 		VN_RELE(vp);
   6676 		*attrset = 0;
   6677 		return (puterrno4(error));
   6678 	}
   6679 
   6680 	/* if parent dir is attrdir, set namedattr fh flag */
   6681 	if (dvp->v_flag & V_XATTRDIR)
   6682 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
   6683 
   6684 	if (cs->vp)
   6685 		VN_RELE(cs->vp);
   6686 
   6687 	cs->vp = vp;
   6688 
   6689 	/*
   6690 	 * if we did not create the file, we will need to check
   6691 	 * the access bits on the file
   6692 	 */
   6693 
   6694 	if (!created) {
   6695 		if (setsize)
   6696 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
   6697 		status = check_open_access(args->share_access, cs, req);
   6698 		if (status != NFS4_OK)
   6699 			*attrset = 0;
   6700 	}
   6701 	return (status);
   6702 }
   6703 
   6704 /*ARGSUSED*/
   6705 static void
   6706 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
   6707     rfs4_openowner_t *oo, delegreq_t deleg,
   6708     uint32_t access, uint32_t deny,
   6709     OPEN4res *resp, int deleg_cur)
   6710 {
   6711 	/* XXX Currently not using req  */
   6712 	rfs4_state_t *sp;
   6713 	rfs4_file_t *fp;
   6714 	bool_t screate = TRUE;
   6715 	bool_t fcreate = TRUE;
   6716 	uint32_t open_a, share_a;
   6717 	uint32_t open_d, share_d;
   6718 	rfs4_deleg_state_t *dsp;
   6719 	sysid_t sysid;
   6720 	nfsstat4 status;
   6721 	caller_context_t ct;
   6722 	int fflags = 0;
   6723 	int recall = 0;
   6724 	int err;
   6725 	int first_open;
   6726 
   6727 	/* get the file struct and hold a lock on it during initial open */
   6728 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
   6729 	if (fp == NULL) {
   6730 		resp->status = NFS4ERR_RESOURCE;
   6731 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
   6732 		return;
   6733 	}
   6734 
   6735 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
   6736 	if (sp == NULL) {
   6737 		resp->status = NFS4ERR_RESOURCE;
   6738 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
   6739 		/* No need to keep any reference */
   6740 		rw_exit(&fp->rf_file_rwlock);
   6741 		rfs4_file_rele(fp);
   6742 		return;
   6743 	}
   6744 
   6745 	/* try to get the sysid before continuing */
   6746 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
   6747 		resp->status = status;
   6748 		rfs4_file_rele(fp);
   6749 		/* Not a fully formed open; "close" it */
   6750 		if (screate == TRUE)
   6751 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6752 		rfs4_state_rele(sp);
   6753 		return;
   6754 	}
   6755 
   6756 	/* Calculate the fflags for this OPEN. */
   6757 	if (access & OPEN4_SHARE_ACCESS_READ)
   6758 		fflags |= FREAD;
   6759 	if (access & OPEN4_SHARE_ACCESS_WRITE)
   6760 		fflags |= FWRITE;
   6761 
   6762 	rfs4_dbe_lock(sp->rs_dbe);
   6763 
   6764 	/*
   6765 	 * Calculate the new deny and access mode that this open is adding to
   6766 	 * the file for this open owner;
   6767 	 */
   6768 	open_d = (deny & ~sp->rs_open_deny);
   6769 	open_a = (access & ~sp->rs_open_access);
   6770 
   6771 	/*
   6772 	 * Calculate the new share access and share deny modes that this open
   6773 	 * is adding to the file for this open owner;
   6774 	 */
   6775 	share_a = (access & ~sp->rs_share_access);
   6776 	share_d = (deny & ~sp->rs_share_deny);
   6777 
   6778 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
   6779 
   6780 	/*
   6781 	 * Check to see the client has already sent an open for this
   6782 	 * open owner on this file with the same share/deny modes.
   6783 	 * If so, we don't need to check for a conflict and we don't
   6784 	 * need to add another shrlock.  If not, then we need to
   6785 	 * check for conflicts in deny and access before checking for
   6786 	 * conflicts in delegation.  We don't want to recall a
   6787 	 * delegation based on an open that will eventually fail based
   6788 	 * on shares modes.
   6789 	 */
   6790 
   6791 	if (share_a || share_d) {
   6792 		if ((err = rfs4_share(sp, access, deny)) != 0) {
   6793 			rfs4_dbe_unlock(sp->rs_dbe);
   6794 			resp->status = err;
   6795 
   6796 			rfs4_file_rele(fp);
   6797 			/* Not a fully formed open; "close" it */
   6798 			if (screate == TRUE)
   6799 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6800 			rfs4_state_rele(sp);
   6801 			return;
   6802 		}
   6803 	}
   6804 
   6805 	rfs4_dbe_lock(fp->rf_dbe);
   6806 
   6807 	/*
   6808 	 * Check to see if this file is delegated and if so, if a
   6809 	 * recall needs to be done.
   6810 	 */
   6811 	if (rfs4_check_recall(sp, access)) {
   6812 		rfs4_dbe_unlock(fp->rf_dbe);
   6813 		rfs4_dbe_unlock(sp->rs_dbe);
   6814 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
   6815 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
   6816 		rfs4_dbe_lock(sp->rs_dbe);
   6817 
   6818 		/* if state closed while lock was dropped */
   6819 		if (sp->rs_closed) {
   6820 			if (share_a || share_d)
   6821 				(void) rfs4_unshare(sp);
   6822 			rfs4_dbe_unlock(sp->rs_dbe);
   6823 			rfs4_file_rele(fp);
   6824 			/* Not a fully formed open; "close" it */
   6825 			if (screate == TRUE)
   6826 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6827 			rfs4_state_rele(sp);
   6828 			resp->status = NFS4ERR_OLD_STATEID;
   6829 			return;
   6830 		}
   6831 
   6832 		rfs4_dbe_lock(fp->rf_dbe);
   6833 		/* Let's see if the delegation was returned */
   6834 		if (rfs4_check_recall(sp, access)) {
   6835 			rfs4_dbe_unlock(fp->rf_dbe);
   6836 			if (share_a || share_d)
   6837 				(void) rfs4_unshare(sp);
   6838 			rfs4_dbe_unlock(sp->rs_dbe);
   6839 			rfs4_file_rele(fp);
   6840 			rfs4_update_lease(sp->rs_owner->ro_client);
   6841 
   6842 			/* Not a fully formed open; "close" it */
   6843 			if (screate == TRUE)
   6844 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6845 			rfs4_state_rele(sp);
   6846 			resp->status = NFS4ERR_DELAY;
   6847 			return;
   6848 		}
   6849 	}
   6850 	/*
   6851 	 * the share check passed and any delegation conflict has been
   6852 	 * taken care of, now call vop_open.
   6853 	 * if this is the first open then call vop_open with fflags.
   6854 	 * if not, call vn_open_upgrade with just the upgrade flags.
   6855 	 *
   6856 	 * if the file has been opened already, it will have the current
   6857 	 * access mode in the state struct.  if it has no share access, then
   6858 	 * this is a new open.
   6859 	 *
   6860 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
   6861 	 * call VOP_OPEN(), just do the open upgrade.
   6862 	 */
   6863 	if (first_open && !deleg_cur) {
   6864 		ct.cc_sysid = sysid;
   6865 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
   6866 		ct.cc_caller_id = nfs4_srv_caller_id;
   6867 		ct.cc_flags = CC_DONTBLOCK;
   6868 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
   6869 		if (err) {
   6870 			rfs4_dbe_unlock(fp->rf_dbe);
   6871 			if (share_a || share_d)
   6872 				(void) rfs4_unshare(sp);
   6873 			rfs4_dbe_unlock(sp->rs_dbe);
   6874 			rfs4_file_rele(fp);
   6875 
   6876 			/* Not a fully formed open; "close" it */
   6877 			if (screate == TRUE)
   6878 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
   6879 			rfs4_state_rele(sp);
   6880 			/* check if a monitor detected a delegation conflict */
   6881 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
   6882 				resp->status = NFS4ERR_DELAY;
   6883 			else
   6884 				resp->status = NFS4ERR_SERVERFAULT;
   6885 			return;
   6886 		}
   6887 	} else { /* open upgrade */
   6888 		/*
   6889 		 * calculate the fflags for the new mode that is being added
   6890 		 * by this upgrade.
   6891 		 */
   6892 		fflags = 0;
   6893 		if (open_a & OPEN4_SHARE_ACCESS_READ)
   6894 			fflags |= FREAD;
   6895 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
   6896 			fflags |= FWRITE;
   6897 		vn_open_upgrade(cs->vp, fflags);
   6898 	}
   6899 	sp->rs_open_access |= access;
   6900 	sp->rs_open_deny |= deny;
   6901 
   6902 	if (open_d & OPEN4_SHARE_DENY_READ)
   6903 		fp->rf_deny_read++;
   6904 	if (open_d & OPEN4_SHARE_DENY_WRITE)
   6905 		fp->rf_deny_write++;
   6906 	fp->rf_share_deny |= deny;
   6907 
   6908 	if (open_a & OPEN4_SHARE_ACCESS_READ)
   6909 		fp->rf_access_read++;
   6910 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
   6911 		fp->rf_access_write++;
   6912 	fp->rf_share_access |= access;
   6913 
   6914 	/*
   6915 	 * Check for delegation here. if the deleg argument is not
   6916 	 * DELEG_ANY, then this is a reclaim from a client and
   6917 	 * we must honor the delegation requested. If necessary we can
   6918 	 * set the recall flag.
   6919 	 */
   6920 
   6921 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
   6922 
   6923 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
   6924 
   6925 	next_stateid(&sp->rs_stateid);
   6926 
   6927 	resp->stateid = sp->rs_stateid.stateid;
   6928 
   6929 	rfs4_dbe_unlock(fp->rf_dbe);
   6930 	rfs4_dbe_unlock(sp->rs_dbe);
   6931 
   6932 	if (dsp) {
   6933 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
   6934 		rfs4_deleg_state_rele(dsp);
   6935 	}
   6936 
   6937 	rfs4_file_rele(fp);
   6938 	rfs4_state_rele(sp);
   6939 
   6940 	resp->status = NFS4_OK;
   6941 }
   6942 
   6943 /*ARGSUSED*/
   6944 static void
   6945 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
   6946     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6947 {
   6948 	change_info4 *cinfo = &resp->cinfo;
   6949 	bitmap4 *attrset = &resp->attrset;
   6950 
   6951 	if (args->opentype == OPEN4_NOCREATE)
   6952 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
   6953 		    req, cs, args->share_access, cinfo);
   6954 	else {
   6955 		/* inhibit delegation grants during exclusive create */
   6956 
   6957 		if (args->mode == EXCLUSIVE4)
   6958 			rfs4_disable_delegation();
   6959 
   6960 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
   6961 		    oo->ro_client->rc_clientid);
   6962 	}
   6963 
   6964 	if (resp->status == NFS4_OK) {
   6965 
   6966 		/* cs->vp cs->fh now reference the desired file */
   6967 
   6968 		rfs4_do_open(cs, req, oo,
   6969 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
   6970 		    args->share_access, args->share_deny, resp, 0);
   6971 
   6972 		/*
   6973 		 * If rfs4_createfile set attrset, we must
   6974 		 * clear this attrset before the response is copied.
   6975 		 */
   6976 		if (resp->status != NFS4_OK && resp->attrset) {
   6977 			resp->attrset = 0;
   6978 		}
   6979 	}
   6980 	else
   6981 		*cs->statusp = resp->status;
   6982 
   6983 	if (args->mode == EXCLUSIVE4)
   6984 		rfs4_enable_delegation();
   6985 }
   6986 
   6987 /*ARGSUSED*/
   6988 static void
   6989 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
   6990     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   6991 {
   6992 	change_info4 *cinfo = &resp->cinfo;
   6993 	vattr_t va;
   6994 	vtype_t v_type = cs->vp->v_type;
   6995 	int error = 0;
   6996 
   6997 	/* Verify that we have a regular file */
   6998 	if (v_type != VREG) {
   6999 		if (v_type == VDIR)
   7000 			resp->status = NFS4ERR_ISDIR;
   7001 		else if (v_type == VLNK)
   7002 			resp->status = NFS4ERR_SYMLINK;
   7003 		else
   7004 			resp->status = NFS4ERR_INVAL;
   7005 		return;
   7006 	}
   7007 
   7008 	va.va_mask = AT_MODE|AT_UID;
   7009 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
   7010 	if (error) {
   7011 		resp->status = puterrno4(error);
   7012 		return;
   7013 	}
   7014 
   7015 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
   7016 
   7017 	/*
   7018 	 * Check if we have access to the file, Note the the file
   7019 	 * could have originally been open UNCHECKED or GUARDED
   7020 	 * with mode bits that will now fail, but there is nothing
   7021 	 * we can really do about that except in the case that the
   7022 	 * owner of the file is the one requesting the open.
   7023 	 */
   7024 	if (crgetuid(cs->cr) != va.va_uid) {
   7025 		resp->status = check_open_access(args->share_access, cs, req);
   7026 		if (resp->status != NFS4_OK) {
   7027 			return;
   7028 		}
   7029 	}
   7030 
   7031 	/*
   7032 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
   7033 	 */
   7034 	cinfo->before = 0;
   7035 	cinfo->after = 0;
   7036 	cinfo->atomic = FALSE;
   7037 
   7038 	rfs4_do_open(cs, req, oo,
   7039 	    NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
   7040 	    args->share_access, args->share_deny, resp, 0);
   7041 }
   7042 
   7043 static void
   7044 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
   7045     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   7046 {
   7047 	int error;
   7048 	nfsstat4 status;
   7049 	stateid4 stateid =
   7050 	    args->open_claim4_u.delegate_cur_info.delegate_stateid;
   7051 	rfs4_deleg_state_t *dsp;
   7052 
   7053 	/*
   7054 	 * Find the state info from the stateid and confirm that the
   7055 	 * file is delegated.  If the state openowner is the same as
   7056 	 * the supplied openowner we're done. If not, get the file
   7057 	 * info from the found state info. Use that file info to
   7058 	 * create the state for this lock owner. Note solaris doen't
   7059 	 * really need the pathname to find the file. We may want to
   7060 	 * lookup the pathname and make sure that the vp exist and
   7061 	 * matches the vp in the file structure. However it is
   7062 	 * possible that the pathname nolonger exists (local process
   7063 	 * unlinks the file), so this may not be that useful.
   7064 	 */
   7065 
   7066 	status = rfs4_get_deleg_state(&stateid, &dsp);
   7067 	if (status != NFS4_OK) {
   7068 		resp->status = status;
   7069 		return;
   7070 	}
   7071 
   7072 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
   7073 
   7074 	/*
   7075 	 * New lock owner, create state. Since this was probably called
   7076 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
   7077 	 */
   7078 
   7079 	ASSERT(cs->vp != NULL);
   7080 	VN_RELE(cs->vp);
   7081 	VN_HOLD(dsp->rds_finfo->rf_vp);
   7082 	cs->vp = dsp->rds_finfo->rf_vp;
   7083 
   7084 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
   7085 		rfs4_deleg_state_rele(dsp);
   7086 		*cs->statusp = resp->status = puterrno4(error);
   7087 		return;
   7088 	}
   7089 
   7090 	/* Mark progress for delegation returns */
   7091 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
   7092 	rfs4_deleg_state_rele(dsp);
   7093 	rfs4_do_open(cs, req, oo, DELEG_NONE,
   7094 	    args->share_access, args->share_deny, resp, 1);
   7095 }
   7096 
   7097 /*ARGSUSED*/
   7098 static void
   7099 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
   7100     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
   7101 {
   7102 	/*
   7103 	 * Lookup the pathname, it must already exist since this file
   7104 	 * was delegated.
   7105 	 *
   7106 	 * Find the file and state info for this vp and open owner pair.
   7107 	 *	check that they are in fact delegated.
   7108 	 *	check that the state access and deny modes are the same.
   7109 	 *
   7110 	 * Return the delgation possibly seting the recall flag.
   7111 	 */
   7112 	rfs4_file_t *fp;
   7113 	rfs4_state_t *sp;
   7114 	bool_t create = FALSE;
   7115 	bool_t dcreate = FALSE;
   7116 	rfs4_deleg_state_t *dsp;
   7117 	nfsace4 *ace;
   7118 
   7119 	/* Note we ignore oflags */
   7120 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
   7121 	    req, cs, args->share_access, &resp->cinfo);
   7122 
   7123 	if (resp->status != NFS4_OK) {
   7124 		return;
   7125 	}
   7126 
   7127 	/* get the file struct and hold a lock on it during initial open */
   7128 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
   7129 	if (fp == NULL) {
   7130 		resp->status = NFS4ERR_RESOURCE;
   7131 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
   7132 		return;
   7133 	}
   7134 
   7135 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
   7136 	if (sp == NULL) {
   7137 		resp->status = NFS4ERR_SERVERFAULT;
   7138 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
   7139 		rw_exit(&fp->rf_file_rwlock);
   7140 		rfs4_file_rele(fp);
   7141 		return;
   7142 	}
   7143 
   7144 	rfs4_dbe_lock(sp->rs_dbe);
   7145 	rfs4_dbe_lock(fp->rf_dbe);
   7146 	if (args->share_access != sp->rs_share_access ||
   7147 	    args->share_deny != sp->rs_share_deny ||
   7148 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
   7149 		NFS4_DEBUG(rfs4_debug,
   7150 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
   7151 		rfs4_dbe_unlock(fp->rf_dbe);
   7152 		rfs4_dbe_unlock(sp->rs_dbe);
   7153 		rfs4_file_rele(fp);
   7154 		rfs4_state_rele(sp);
   7155 		resp->status = NFS4ERR_SERVERFAULT;
   7156 		return;
   7157 	}
   7158 	rfs4_dbe_unlock(fp->rf_dbe);
   7159 	rfs4_dbe_unlock(sp->rs_dbe);
   7160 
   7161 	dsp = rfs4_finddeleg(sp, &dcreate);
   7162 	if (dsp == NULL) {
   7163 		rfs4_state_rele(sp);
   7164 		rfs4_file_rele(fp);
   7165 		resp->status = NFS4ERR_SERVERFAULT;
   7166 		return;
   7167 	}
   7168 
   7169 	next_stateid(&sp->rs_stateid);
   7170 
   7171 	resp->stateid = sp->rs_stateid.stateid;
   7172 
   7173 	resp->delegation.delegation_type = dsp->rds_dtype;
   7174 
   7175 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
   7176 		open_read_delegation4 *rv =
   7177 		    &resp->delegation.open_delegation4_u.read;
   7178 
   7179 		rv->stateid = dsp->rds_delegid.stateid;
   7180 		rv->recall = FALSE; /* no policy in place to set to TRUE */
   7181 		ace = &rv->permissions;
   7182 	} else {
   7183 		open_write_delegation4 *rv =
   7184 		    &resp->delegation.open_delegation4_u.write;
   7185 
   7186 		rv->stateid = dsp->rds_delegid.stateid;
   7187 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
   7188 		ace = &rv->permissions;
   7189 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
   7190 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
   7191 	}
   7192 
   7193 	/* XXX For now */
   7194 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
   7195 	ace->flag = 0;
   7196 	ace->access_mask = 0;
   7197 	ace->who.utf8string_len = 0;
   7198 	ace->who.utf8string_val = 0;
   7199 
   7200 	rfs4_deleg_state_rele(dsp);
   7201 	rfs4_state_rele(sp);
   7202 	rfs4_file_rele(fp);
   7203 }
   7204 
   7205 typedef enum {
   7206 	NFS4_CHKSEQ_OKAY = 0,
   7207 	NFS4_CHKSEQ_REPLAY = 1,
   7208 	NFS4_CHKSEQ_BAD = 2
   7209 } rfs4_chkseq_t;
   7210 
   7211 /*
   7212  * Generic function for sequence number checks.
   7213  */
   7214 static rfs4_chkseq_t
   7215 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
   7216     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
   7217 {
   7218 	/* Same sequence ids and matching operations? */
   7219 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
   7220 		if (copyres == TRUE) {
   7221 			rfs4_free_reply(resop);
   7222 			rfs4_copy_reply(resop, lastop);
   7223 		}
   7224 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
   7225 		    "Replayed SEQID %d\n", seqid));
   7226 		return (NFS4_CHKSEQ_REPLAY);
   7227 	}
   7228 
   7229 	/* If the incoming sequence is not the next expected then it is bad */
   7230 	if (rqst_seq != seqid + 1) {
   7231 		if (rqst_seq == seqid) {
   7232 			NFS4_DEBUG(rfs4_debug,
   7233 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
   7234 			    "but last op was %d current op is %d\n",
   7235 			    lastop->resop, resop->resop));
   7236 			return (NFS4_CHKSEQ_BAD);
   7237 		}
   7238 		NFS4_DEBUG(rfs4_debug,
   7239 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
   7240 		    rqst_seq, seqid));
   7241 		return (NFS4_CHKSEQ_BAD);
   7242 	}
   7243 
   7244 	/* Everything okay -- next expected */
   7245 	return (NFS4_CHKSEQ_OKAY);
   7246 }
   7247 
   7248 
   7249 static rfs4_chkseq_t
   7250 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
   7251 {
   7252 	rfs4_chkseq_t rc;
   7253 
   7254 	rfs4_dbe_lock(op->ro_dbe);
   7255 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
   7256 	    TRUE);
   7257 	rfs4_dbe_unlock(op->ro_dbe);
   7258 
   7259 	if (rc == NFS4_CHKSEQ_OKAY)
   7260 		rfs4_update_lease(op->ro_client);
   7261 
   7262 	return (rc);
   7263 }
   7264 
   7265 static rfs4_chkseq_t
   7266 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
   7267 {
   7268 	rfs4_chkseq_t rc;
   7269 
   7270 	rfs4_dbe_lock(op->ro_dbe);
   7271 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
   7272 	    olo_seqid, resop, FALSE);
   7273 	rfs4_dbe_unlock(op->ro_dbe);
   7274 
   7275 	return (rc);
   7276 }
   7277 
   7278 static rfs4_chkseq_t
   7279 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
   7280 {
   7281 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
   7282 
   7283 	rfs4_dbe_lock(lsp->rls_dbe);
   7284 	if (!lsp->rls_skip_seqid_check)
   7285 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
   7286 		    resop, TRUE);
   7287 	rfs4_dbe_unlock(lsp->rls_dbe);
   7288 
   7289 	return (rc);
   7290 }
   7291 
   7292 static void
   7293 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
   7294     struct svc_req *req, struct compound_state *cs)
   7295 {
   7296 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
   7297 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
   7298 	open_owner4 *owner = &args->owner;
   7299 	open_claim_type4 claim = args->claim;
   7300 	rfs4_client_t *cp;
   7301 	rfs4_openowner_t *oo;
   7302 	bool_t create;
   7303 	bool_t replay = FALSE;
   7304 	int can_reclaim;
   7305 
   7306 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
   7307 	    OPEN4args *, args);
   7308 
   7309 	if (cs->vp == NULL) {
   7310 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   7311 		goto end;
   7312 	}
   7313 
   7314 	/*
   7315 	 * Need to check clientid and lease expiration first based on
   7316 	 * error ordering and incrementing sequence id.
   7317 	 */
   7318 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
   7319 	if (cp == NULL) {
   7320 		*cs->statusp = resp->status =
   7321 		    rfs4_check_clientid(&owner->clientid, 0);
   7322 		goto end;
   7323 	}
   7324 
   7325 	if (rfs4_lease_expired(cp)) {
   7326 		rfs4_client_close(cp);
   7327 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   7328 		goto end;
   7329 	}
   7330 	can_reclaim = cp->rc_can_reclaim;
   7331 
   7332 	/*
   7333 	 * Find the open_owner for use from this point forward.  Take
   7334 	 * care in updating the sequence id based on the type of error
   7335 	 * being returned.
   7336 	 */
   7337 retry:
   7338 	create = TRUE;
   7339 	oo = rfs4_findopenowner(owner, &create, args->seqid);
   7340 	if (oo == NULL) {
   7341 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
   7342 		rfs4_client_rele(cp);
   7343 		goto end;
   7344 	}
   7345 
   7346 	/* Hold off access to the sequence space while the open is done */
   7347 	rfs4_sw_enter(&oo->ro_sw);
   7348 
   7349 	/*
   7350 	 * If the open_owner existed before at the server, then check
   7351 	 * the sequence id.
   7352 	 */
   7353 	if (!create && !oo->ro_postpone_confirm) {
   7354 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
   7355 		case NFS4_CHKSEQ_BAD:
   7356 			if ((args->seqid > oo->ro_open_seqid) &&
   7357 			    oo->ro_need_confirm) {
   7358 				rfs4_free_opens(oo, TRUE, FALSE);
   7359 				rfs4_sw_exit(&oo->ro_sw);
   7360 				rfs4_openowner_rele(oo);
   7361 				goto retry;
   7362 			}
   7363 			resp->status = NFS4ERR_BAD_SEQID;
   7364 			goto out;
   7365 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
   7366 			replay = TRUE;
   7367 			goto out;
   7368 		default:
   7369 			break;
   7370 		}
   7371 
   7372 		/*
   7373 		 * Sequence was ok and open owner exists
   7374 		 * check to see if we have yet to see an
   7375 		 * open_confirm.
   7376 		 */
   7377 		if (oo->ro_need_confirm) {
   7378 			rfs4_free_opens(oo, TRUE, FALSE);
   7379 			rfs4_sw_exit(&oo->ro_sw);
   7380 			rfs4_openowner_rele(oo);
   7381 			goto retry;
   7382 		}
   7383 	}
   7384 	/* Grace only applies to regular-type OPENs */
   7385 	if (rfs4_clnt_in_grace(cp) &&
   7386 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
   7387 		*cs->statusp = resp->status = NFS4ERR_GRACE;
   7388 		goto out;
   7389 	}
   7390 
   7391 	/*
   7392 	 * If previous state at the server existed then can_reclaim
   7393 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
   7394 	 * client.
   7395 	 */
   7396 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
   7397 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   7398 		goto out;
   7399 	}
   7400 
   7401 
   7402 	/*
   7403 	 * Reject the open if the client has missed the grace period
   7404 	 */
   7405 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
   7406 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
   7407 		goto out;
   7408 	}
   7409 
   7410 	/* Couple of up-front bookkeeping items */
   7411 	if (oo->ro_need_confirm) {
   7412 		/*
   7413 		 * If this is a reclaim OPEN then we should not ask
   7414 		 * for a confirmation of the open_owner per the
   7415 		 * protocol specification.
   7416 		 */
   7417 		if (claim == CLAIM_PREVIOUS)
   7418 			oo->ro_need_confirm = FALSE;
   7419 		else
   7420 			resp->rflags |= OPEN4_RESULT_CONFIRM;
   7421 	}
   7422 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
   7423 
   7424 	/*
   7425 	 * If there is an unshared filesystem mounted on this vnode,
   7426 	 * do not allow to open/create in this directory.
   7427 	 */
   7428 	if (vn_ismntpt(cs->vp)) {
   7429 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
   7430 		goto out;
   7431 	}
   7432 
   7433 	/*
   7434 	 * access must READ, WRITE, or BOTH.  No access is invalid.
   7435 	 * deny can be READ, WRITE, BOTH, or NONE.
   7436 	 * bits not defined for access/deny are invalid.
   7437 	 */
   7438 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
   7439 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
   7440 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
   7441 		*cs->statusp = resp->status = NFS4ERR_INVAL;
   7442 		goto out;
   7443 	}
   7444 
   7445 
   7446 	/*
   7447 	 * make sure attrset is zero before response is built.
   7448 	 */
   7449 	resp->attrset = 0;
   7450 
   7451 	switch (claim) {
   7452 	case CLAIM_NULL:
   7453 		rfs4_do_opennull(cs, req, args, oo, resp);
   7454 		break;
   7455 	case CLAIM_PREVIOUS:
   7456 		rfs4_do_openprev(cs, req, args, oo, resp);
   7457 		break;
   7458 	case CLAIM_DELEGATE_CUR:
   7459 		rfs4_do_opendelcur(cs, req, args, oo, resp);
   7460 		break;
   7461 	case CLAIM_DELEGATE_PREV:
   7462 		rfs4_do_opendelprev(cs, req, args, oo, resp);
   7463 		break;
   7464 	default:
   7465 		resp->status = NFS4ERR_INVAL;
   7466 		break;
   7467 	}
   7468 
   7469 out:
   7470 	rfs4_client_rele(cp);
   7471 
   7472 	/* Catch sequence id handling here to make it a little easier */
   7473 	switch (resp->status) {
   7474 	case NFS4ERR_BADXDR:
   7475 	case NFS4ERR_BAD_SEQID:
   7476 	case NFS4ERR_BAD_STATEID:
   7477 	case NFS4ERR_NOFILEHANDLE:
   7478 	case NFS4ERR_RESOURCE:
   7479 	case NFS4ERR_STALE_CLIENTID:
   7480 	case NFS4ERR_STALE_STATEID:
   7481 		/*
   7482 		 * The protocol states that if any of these errors are
   7483 		 * being returned, the sequence id should not be
   7484 		 * incremented.  Any other return requires an
   7485 		 * increment.
   7486 		 */
   7487 		break;
   7488 	default:
   7489 		/* Always update the lease in this case */
   7490 		rfs4_update_lease(oo->ro_client);
   7491 
   7492 		/* Regular response - copy the result */
   7493 		if (!replay)
   7494 			rfs4_update_open_resp(oo, resop, &cs->fh);
   7495 
   7496 		/*
   7497 		 * REPLAY case: Only if the previous response was OK
   7498 		 * do we copy the filehandle.  If not OK, no
   7499 		 * filehandle to copy.
   7500 		 */
   7501 		if (replay == TRUE &&
   7502 		    resp->status == NFS4_OK &&
   7503 		    oo->ro_reply_fh.nfs_fh4_val) {
   7504 			/*
   7505 			 * If this is a replay, we must restore the
   7506 			 * current filehandle/vp to that of what was
   7507 			 * returned originally.  Try our best to do
   7508 			 * it.
   7509 			 */
   7510 			nfs_fh4_fmt_t *fh_fmtp =
   7511 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
   7512 
   7513 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
   7514 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
   7515 
   7516 			if (cs->exi == NULL) {
   7517 				resp->status = NFS4ERR_STALE;
   7518 				goto finish;
   7519 			}
   7520 
   7521 			VN_RELE(cs->vp);
   7522 
   7523 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
   7524 			    &resp->status);
   7525 
   7526 			if (cs->vp == NULL)
   7527 				goto finish;
   7528 
   7529 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
   7530 		}
   7531 
   7532 		/*
   7533 		 * If this was a replay, no need to update the
   7534 		 * sequence id. If the open_owner was not created on
   7535 		 * this pass, then update.  The first use of an
   7536 		 * open_owner will not bump the sequence id.
   7537 		 */
   7538 		if (replay == FALSE && !create)
   7539 			rfs4_update_open_sequence(oo);
   7540 		/*
   7541 		 * If the client is receiving an error and the
   7542 		 * open_owner needs to be confirmed, there is no way
   7543 		 * to notify the client of this fact ignoring the fact
   7544 		 * that the server has no method of returning a
   7545 		 * stateid to confirm.  Therefore, the server needs to
   7546 		 * mark this open_owner in a way as to avoid the
   7547 		 * sequence id checking the next time the client uses
   7548 		 * this open_owner.
   7549 		 */
   7550 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
   7551 			oo->ro_postpone_confirm = TRUE;
   7552 		/*
   7553 		 * If OK response then clear the postpone flag and
   7554 		 * reset the sequence id to keep in sync with the
   7555 		 * client.
   7556 		 */
   7557 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
   7558 			oo->ro_postpone_confirm = FALSE;
   7559 			oo->ro_open_seqid = args->seqid;
   7560 		}
   7561 		break;
   7562 	}
   7563 
   7564 finish:
   7565 	*cs->statusp = resp->status;
   7566 
   7567 	rfs4_sw_exit(&oo->ro_sw);
   7568 	rfs4_openowner_rele(oo);
   7569 
   7570 end:
   7571 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
   7572 	    OPEN4res *, resp);
   7573 }
   7574 
   7575 /*ARGSUSED*/
   7576 void
   7577 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
   7578     struct svc_req *req, struct compound_state *cs)
   7579 {
   7580 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
   7581 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
   7582 	rfs4_state_t *sp;
   7583 	nfsstat4 status;
   7584 
   7585 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
   7586 	    OPEN_CONFIRM4args *, args);
   7587 
   7588 	if (cs->vp == NULL) {
   7589 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
   7590 		goto out;
   7591 	}
   7592 
   7593 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
   7594 	if (status != NFS4_OK) {
   7595 		*cs->statusp = resp->status = status;
   7596 		goto out;
   7597 	}
   7598 
   7599 	/* Ensure specified filehandle matches */
   7600 	if (cs->vp != sp->rs_finfo->rf_vp) {
   7601 		rfs4_state_rele(sp);
   7602 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7603 		goto out;
   7604 	}
   7605 
   7606 	/* hold off other access to open_owner while we tinker */
   7607 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
   7608 
   7609 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
   7610 	case NFS4_CHECK_STATEID_OKAY:
   7611 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7612 		    resop) != 0) {
   7613 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7614 			break;
   7615 		}
   7616 		/*
   7617 		 * If it is the appropriate stateid and determined to
   7618 		 * be "OKAY" then this means that the stateid does not
   7619 		 * need to be confirmed and the client is in error for
   7620 		 * sending an OPEN_CONFIRM.
   7621 		 */
   7622 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7623 		break;
   7624 	case NFS4_CHECK_STATEID_OLD:
   7625 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7626 		break;
   7627 	case NFS4_CHECK_STATEID_BAD:
   7628 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
   7629 		break;
   7630 	case NFS4_CHECK_STATEID_EXPIRED:
   7631 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
   7632 		break;
   7633 	case NFS4_CHECK_STATEID_CLOSED:
   7634 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
   7635 		break;
   7636 	case NFS4_CHECK_STATEID_REPLAY:
   7637 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7638 		    resop)) {
   7639 		case NFS4_CHKSEQ_OKAY:
   7640 			/*
   7641 			 * This is replayed stateid; if seqid matches
   7642 			 * next expected, then client is using wrong seqid.
   7643 			 */
   7644 			/* fall through */
   7645 		case NFS4_CHKSEQ_BAD:
   7646 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7647 			break;
   7648 		case NFS4_CHKSEQ_REPLAY:
   7649 			/*
   7650 			 * Note this case is the duplicate case so
   7651 			 * resp->status is already set.
   7652 			 */
   7653 			*cs->statusp = resp->status;
   7654 			rfs4_update_lease(sp->rs_owner->ro_client);
   7655 			break;
   7656 		}
   7657 		break;
   7658 	case NFS4_CHECK_STATEID_UNCONFIRMED:
   7659 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
   7660 		    resop) != NFS4_CHKSEQ_OKAY) {
   7661 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
   7662 			break;
   7663 		}
   7664 		*cs->statusp = resp->status = NFS4_OK;
   7665 
   7666 		next_stateid(&sp->rs_stateid);
   7667 		resp->open_stateid = sp->rs_stateid.stateid;
   7668 		sp->rs_owner->ro_need_confirm = FALSE;
   7669 		rfs4_update_lease(sp->rs_owner->ro_client);
   7670 		rfs4_update_open_sequence(sp->rs_owner);
   7671 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
   7672 		break;
   7673 	default:
   7674 		ASSERT(FALSE);
   7675 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
   7676 		break;
   7677 	}
   7678 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
   7679 	rfs4_state_rele(sp);
   7680 
   7681 out:
   7682 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
   7683 	    OPEN_CONFIRM4res *, resp);
   7684 }
   7685 
   7686 /*ARGSUSED*/
   7687 void
   7688 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
   7689     struct svc_req *req, struct compound_state *cs)
   7690 {
   7691 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
   7692 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
   7693 	uint32_t access = args->share_access;
   7694 	uint32_t deny = args->share_deny;
   7695 	nfsstat4 status;
   7696 	rfs4_state_t *sp;
   7697 	rfs4_file_t *fp;
   7698 	int fflags = 0;
   7699 
   7700 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
   7701