Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
     27 /* All Rights Reserved */
     28 
     29 #include <sys/param.h>
     30 #include <sys/types.h>
     31 #include <sys/systm.h>
     32 #include <sys/cred.h>
     33 #include <sys/buf.h>
     34 #include <sys/vfs.h>
     35 #include <sys/vnode.h>
     36 #include <sys/uio.h>
     37 #include <sys/errno.h>
     38 #include <sys/sysmacros.h>
     39 #include <sys/statvfs.h>
     40 #include <sys/kmem.h>
     41 #include <sys/dirent.h>
     42 #include <sys/cmn_err.h>
     43 #include <sys/debug.h>
     44 #include <sys/systeminfo.h>
     45 #include <sys/flock.h>
     46 #include <sys/nbmlock.h>
     47 #include <sys/policy.h>
     48 #include <sys/sdt.h>
     49 
     50 #include <rpc/types.h>
     51 #include <rpc/auth.h>
     52 #include <rpc/svc.h>
     53 #include <rpc/rpc_rdma.h>
     54 
     55 #include <nfs/nfs.h>
     56 #include <nfs/export.h>
     57 
     58 #include <sys/strsubr.h>
     59 
     60 #include <sys/tsol/label.h>
     61 #include <sys/tsol/tndb.h>
     62 
     63 #include <inet/ip.h>
     64 #include <inet/ip6.h>
     65 
     66 /*
     67  * These are the interface routines for the server side of the
     68  * Network File System.  See the NFS version 3 protocol specification
     69  * for a description of this interface.
     70  */
     71 
     72 #ifdef DEBUG
     73 int rfs3_do_pre_op_attr = 1;
     74 int rfs3_do_post_op_attr = 1;
     75 int rfs3_do_post_op_fh3 = 1;
     76 #endif
     77 
     78 static writeverf3 write3verf;
     79 
     80 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
     81 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
     82 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
     83 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
     84 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
     85 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
     86 
     87 u_longlong_t nfs3_srv_caller_id;
     88 
     89 /* ARGSUSED */
     90 void
     91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
     92 	struct svc_req *req, cred_t *cr)
     93 {
     94 	int error;
     95 	vnode_t *vp;
     96 	struct vattr va;
     97 
     98 	vp = nfs3_fhtovp(&args->object, exi);
     99 
    100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
    101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
    102 
    103 	if (vp == NULL) {
    104 		error = ESTALE;
    105 		goto out;
    106 	}
    107 
    108 	va.va_mask = AT_ALL;
    109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
    110 
    111 	if (!error) {
    112 		/* overflow error if time or size is out of range */
    113 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
    114 		if (error)
    115 			goto out;
    116 		resp->status = NFS3_OK;
    117 
    118 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
    119 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
    120 
    121 		VN_RELE(vp);
    122 
    123 		return;
    124 	}
    125 
    126 out:
    127 	if (curthread->t_flag & T_WOULDBLOCK) {
    128 		curthread->t_flag &= ~T_WOULDBLOCK;
    129 		resp->status = NFS3ERR_JUKEBOX;
    130 	} else
    131 		resp->status = puterrno3(error);
    132 
    133 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
    134 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
    135 
    136 	if (vp != NULL)
    137 		VN_RELE(vp);
    138 }
    139 
    140 void *
    141 rfs3_getattr_getfh(GETATTR3args *args)
    142 {
    143 
    144 	return (&args->object);
    145 }
    146 
    147 void
    148 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
    149 	struct svc_req *req, cred_t *cr)
    150 {
    151 	int error;
    152 	vnode_t *vp;
    153 	struct vattr *bvap;
    154 	struct vattr bva;
    155 	struct vattr *avap;
    156 	struct vattr ava;
    157 	int flag;
    158 	int in_crit = 0;
    159 	struct flock64 bf;
    160 	caller_context_t ct;
    161 
    162 	bvap = NULL;
    163 	avap = NULL;
    164 
    165 	vp = nfs3_fhtovp(&args->object, exi);
    166 
    167 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
    168 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
    169 
    170 	if (vp == NULL) {
    171 		error = ESTALE;
    172 		goto out;
    173 	}
    174 
    175 	error = sattr3_to_vattr(&args->new_attributes, &ava);
    176 	if (error)
    177 		goto out;
    178 
    179 	if (is_system_labeled()) {
    180 		bslabel_t *clabel = req->rq_label;
    181 
    182 		ASSERT(clabel != NULL);
    183 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
    184 		    "got client label from request(1)", struct svc_req *, req);
    185 
    186 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
    187 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
    188 				resp->status = NFS3ERR_ACCES;
    189 				goto out1;
    190 			}
    191 		}
    192 	}
    193 
    194 	/*
    195 	 * We need to specially handle size changes because of
    196 	 * possible conflicting NBMAND locks. Get into critical
    197 	 * region before VOP_GETATTR, so the size attribute is
    198 	 * valid when checking conflicts.
    199 	 *
    200 	 * Also, check to see if the v4 side of the server has
    201 	 * delegated this file.  If so, then we return JUKEBOX to
    202 	 * allow the client to retrasmit its request.
    203 	 */
    204 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
    205 		if (nbl_need_check(vp)) {
    206 			nbl_start_crit(vp, RW_READER);
    207 			in_crit = 1;
    208 		}
    209 	}
    210 
    211 	bva.va_mask = AT_ALL;
    212 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
    213 
    214 	/*
    215 	 * If we can't get the attributes, then we can't do the
    216 	 * right access checking.  So, we'll fail the request.
    217 	 */
    218 	if (error)
    219 		goto out;
    220 
    221 #ifdef DEBUG
    222 	if (rfs3_do_pre_op_attr)
    223 		bvap = &bva;
    224 #else
    225 	bvap = &bva;
    226 #endif
    227 
    228 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
    229 		resp->status = NFS3ERR_ROFS;
    230 		goto out1;
    231 	}
    232 
    233 	if (args->guard.check &&
    234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
    235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
    236 		resp->status = NFS3ERR_NOT_SYNC;
    237 		goto out1;
    238 	}
    239 
    240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
    241 		flag = ATTR_UTIME;
    242 	else
    243 		flag = 0;
    244 
    245 	/*
    246 	 * If the filesystem is exported with nosuid, then mask off
    247 	 * the setuid and setgid bits.
    248 	 */
    249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
    250 	    (exi->exi_export.ex_flags & EX_NOSUID))
    251 		ava.va_mode &= ~(VSUID | VSGID);
    252 
    253 	ct.cc_sysid = 0;
    254 	ct.cc_pid = 0;
    255 	ct.cc_caller_id = nfs3_srv_caller_id;
    256 	ct.cc_flags = CC_DONTBLOCK;
    257 
    258 	/*
    259 	 * We need to specially handle size changes because it is
    260 	 * possible for the client to create a file with modes
    261 	 * which indicate read-only, but with the file opened for
    262 	 * writing.  If the client then tries to set the size of
    263 	 * the file, then the normal access checking done in
    264 	 * VOP_SETATTR would prevent the client from doing so,
    265 	 * although it should be legal for it to do so.  To get
    266 	 * around this, we do the access checking for ourselves
    267 	 * and then use VOP_SPACE which doesn't do the access
    268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
    269 	 * operate on VREG files, let VOP_SETATTR handle the other
    270 	 * extremely rare cases.
    271 	 * Also the client should not be allowed to change the
    272 	 * size of the file if there is a conflicting non-blocking
    273 	 * mandatory lock in the region the change.
    274 	 */
    275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
    276 		if (in_crit) {
    277 			u_offset_t offset;
    278 			ssize_t length;
    279 
    280 			if (ava.va_size < bva.va_size) {
    281 				offset = ava.va_size;
    282 				length = bva.va_size - ava.va_size;
    283 			} else {
    284 				offset = bva.va_size;
    285 				length = ava.va_size - bva.va_size;
    286 			}
    287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
    288 			    NULL)) {
    289 				error = EACCES;
    290 				goto out;
    291 			}
    292 		}
    293 
    294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
    295 			ava.va_mask &= ~AT_SIZE;
    296 			bf.l_type = F_WRLCK;
    297 			bf.l_whence = 0;
    298 			bf.l_start = (off64_t)ava.va_size;
    299 			bf.l_len = 0;
    300 			bf.l_sysid = 0;
    301 			bf.l_pid = 0;
    302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
    303 			    (offset_t)ava.va_size, cr, &ct);
    304 		}
    305 	}
    306 
    307 	if (!error && ava.va_mask)
    308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
    309 
    310 	/* check if a monitor detected a delegation conflict */
    311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
    312 		resp->status = NFS3ERR_JUKEBOX;
    313 		goto out1;
    314 	}
    315 
    316 #ifdef DEBUG
    317 	if (rfs3_do_post_op_attr) {
    318 		ava.va_mask = AT_ALL;
    319 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
    320 	} else
    321 		avap = NULL;
    322 #else
    323 	ava.va_mask = AT_ALL;
    324 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
    325 #endif
    326 
    327 	/*
    328 	 * Force modified metadata out to stable storage.
    329 	 */
    330 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
    331 
    332 	if (error)
    333 		goto out;
    334 
    335 	if (in_crit)
    336 		nbl_end_crit(vp);
    337 
    338 	resp->status = NFS3_OK;
    339 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
    340 
    341 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
    342 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
    343 
    344 	VN_RELE(vp);
    345 
    346 	return;
    347 
    348 out:
    349 	if (curthread->t_flag & T_WOULDBLOCK) {
    350 		curthread->t_flag &= ~T_WOULDBLOCK;
    351 		resp->status = NFS3ERR_JUKEBOX;
    352 	} else
    353 		resp->status = puterrno3(error);
    354 out1:
    355 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
    356 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
    357 
    358 	if (vp != NULL) {
    359 		if (in_crit)
    360 			nbl_end_crit(vp);
    361 		VN_RELE(vp);
    362 	}
    363 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
    364 }
    365 
    366 void *
    367 rfs3_setattr_getfh(SETATTR3args *args)
    368 {
    369 
    370 	return (&args->object);
    371 }
    372 
    373 /* ARGSUSED */
    374 void
    375 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
    376 	struct svc_req *req, cred_t *cr)
    377 {
    378 	int error;
    379 	vnode_t *vp;
    380 	vnode_t *dvp;
    381 	struct vattr *vap;
    382 	struct vattr va;
    383 	struct vattr *dvap;
    384 	struct vattr dva;
    385 	nfs_fh3 *fhp;
    386 	struct sec_ol sec = {0, 0};
    387 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
    388 
    389 	dvap = NULL;
    390 
    391 	/*
    392 	 * Allow lookups from the root - the default
    393 	 * location of the public filehandle.
    394 	 */
    395 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
    396 		dvp = rootdir;
    397 		VN_HOLD(dvp);
    398 
    399 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
    400 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
    401 	} else {
    402 		dvp = nfs3_fhtovp(&args->what.dir, exi);
    403 
    404 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
    405 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
    406 
    407 		if (dvp == NULL) {
    408 			error = ESTALE;
    409 			goto out;
    410 		}
    411 	}
    412 
    413 #ifdef DEBUG
    414 	if (rfs3_do_pre_op_attr) {
    415 		dva.va_mask = AT_ALL;
    416 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
    417 	}
    418 #else
    419 	dva.va_mask = AT_ALL;
    420 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
    421 #endif
    422 
    423 	if (args->what.name == nfs3nametoolong) {
    424 		resp->status = NFS3ERR_NAMETOOLONG;
    425 		goto out1;
    426 	}
    427 
    428 	if (args->what.name == NULL || *(args->what.name) == '\0') {
    429 		resp->status = NFS3ERR_ACCES;
    430 		goto out1;
    431 	}
    432 
    433 	fhp = &args->what.dir;
    434 	if (strcmp(args->what.name, "..") == 0 &&
    435 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
    436 		resp->status = NFS3ERR_NOENT;
    437 		goto out1;
    438 	}
    439 
    440 	/*
    441 	 * If the public filehandle is used then allow
    442 	 * a multi-component lookup
    443 	 */
    444 	if (PUBLIC_FH3(&args->what.dir)) {
    445 		publicfh_flag = TRUE;
    446 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
    447 		    &exi, &sec);
    448 		if (error && exi != NULL)
    449 			exi_rele(exi); /* See comment below Re: publicfh_flag */
    450 		/*
    451 		 * Since WebNFS may bypass MOUNT, we need to ensure this
    452 		 * request didn't come from an unlabeled admin_low client.
    453 		 */
    454 		if (is_system_labeled() && error == 0) {
    455 			struct sockaddr *ca;
    456 			int		addr_type;
    457 			void		*ipaddr;
    458 			tsol_tpc_t	*tp;
    459 
    460 			ca = (struct sockaddr *)svc_getrpccaller(
    461 			    req->rq_xprt)->buf;
    462 			if (ca->sa_family == AF_INET) {
    463 				addr_type = IPV4_VERSION;
    464 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
    465 			} else if (ca->sa_family == AF_INET6) {
    466 				addr_type = IPV6_VERSION;
    467 				ipaddr = &((struct sockaddr_in6 *)
    468 				    ca)->sin6_addr;
    469 			}
    470 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
    471 			if (tp == NULL || tp->tpc_tp.tp_doi !=
    472 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
    473 			    SUN_CIPSO) {
    474 				if (exi != NULL)
    475 					exi_rele(exi);
    476 				VN_RELE(vp);
    477 				resp->status = NFS3ERR_ACCES;
    478 				error = 1;
    479 			}
    480 			if (tp != NULL)
    481 				TPC_RELE(tp);
    482 		}
    483 	} else {
    484 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
    485 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
    486 	}
    487 
    488 	if (is_system_labeled() && error == 0) {
    489 		bslabel_t *clabel = req->rq_label;
    490 
    491 		ASSERT(clabel != NULL);
    492 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
    493 		    "got client label from request(1)", struct svc_req *, req);
    494 
    495 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
    496 			if (!do_rfs_label_check(clabel, dvp,
    497 			    DOMINANCE_CHECK)) {
    498 				if (publicfh_flag && exi != NULL)
    499 					exi_rele(exi);
    500 				VN_RELE(vp);
    501 				resp->status = NFS3ERR_ACCES;
    502 				error = 1;
    503 			}
    504 		}
    505 	}
    506 
    507 #ifdef DEBUG
    508 	if (rfs3_do_post_op_attr) {
    509 		dva.va_mask = AT_ALL;
    510 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
    511 	} else
    512 		dvap = NULL;
    513 #else
    514 	dva.va_mask = AT_ALL;
    515 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
    516 #endif
    517 
    518 	if (error)
    519 		goto out;
    520 
    521 	if (sec.sec_flags & SEC_QUERY) {
    522 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
    523 	} else {
    524 		error = makefh3(&resp->resok.object, vp, exi);
    525 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
    526 			auth_weak = TRUE;
    527 	}
    528 
    529 	if (error) {
    530 		VN_RELE(vp);
    531 		goto out;
    532 	}
    533 
    534 	/*
    535 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
    536 	 * and have obtained a new exportinfo in exi which needs to be
    537 	 * released. Note the the original exportinfo pointed to by exi
    538 	 * will be released by the caller, common_dispatch.
    539 	 */
    540 	if (publicfh_flag)
    541 		exi_rele(exi);
    542 
    543 #ifdef DEBUG
    544 	if (rfs3_do_post_op_attr) {
    545 		va.va_mask = AT_ALL;
    546 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
    547 	} else
    548 		vap = NULL;
    549 #else
    550 	va.va_mask = AT_ALL;
    551 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
    552 #endif
    553 
    554 	VN_RELE(vp);
    555 
    556 	resp->status = NFS3_OK;
    557 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
    558 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
    559 
    560 	/*
    561 	 * If it's public fh, no 0x81, and client's flavor is
    562 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
    563 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
    564 	 */
    565 	if (auth_weak)
    566 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
    567 
    568 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
    569 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
    570 	VN_RELE(dvp);
    571 
    572 	return;
    573 
    574 out:
    575 	if (curthread->t_flag & T_WOULDBLOCK) {
    576 		curthread->t_flag &= ~T_WOULDBLOCK;
    577 		resp->status = NFS3ERR_JUKEBOX;
    578 	} else
    579 		resp->status = puterrno3(error);
    580 out1:
    581 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
    582 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
    583 
    584 	if (dvp != NULL)
    585 		VN_RELE(dvp);
    586 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
    587 
    588 }
    589 
    590 void *
    591 rfs3_lookup_getfh(LOOKUP3args *args)
    592 {
    593 
    594 	return (&args->what.dir);
    595 }
    596 
    597 /* ARGSUSED */
    598 void
    599 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
    600 	struct svc_req *req, cred_t *cr)
    601 {
    602 	int error;
    603 	vnode_t *vp;
    604 	struct vattr *vap;
    605 	struct vattr va;
    606 	int checkwriteperm;
    607 	boolean_t dominant_label = B_FALSE;
    608 	boolean_t equal_label = B_FALSE;
    609 	boolean_t admin_low_client;
    610 
    611 	vap = NULL;
    612 
    613 	vp = nfs3_fhtovp(&args->object, exi);
    614 
    615 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
    616 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
    617 
    618 	if (vp == NULL) {
    619 		error = ESTALE;
    620 		goto out;
    621 	}
    622 
    623 	/*
    624 	 * If the file system is exported read only, it is not appropriate
    625 	 * to check write permissions for regular files and directories.
    626 	 * Special files are interpreted by the client, so the underlying
    627 	 * permissions are sent back to the client for interpretation.
    628 	 */
    629 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
    630 		checkwriteperm = 0;
    631 	else
    632 		checkwriteperm = 1;
    633 
    634 	/*
    635 	 * We need the mode so that we can correctly determine access
    636 	 * permissions relative to a mandatory lock file.  Access to
    637 	 * mandatory lock files is denied on the server, so it might
    638 	 * as well be reflected to the server during the open.
    639 	 */
    640 	va.va_mask = AT_MODE;
    641 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
    642 	if (error)
    643 		goto out;
    644 
    645 #ifdef DEBUG
    646 	if (rfs3_do_post_op_attr)
    647 		vap = &va;
    648 #else
    649 	vap = &va;
    650 #endif
    651 
    652 	resp->resok.access = 0;
    653 
    654 	if (is_system_labeled()) {
    655 		bslabel_t *clabel = req->rq_label;
    656 
    657 		ASSERT(clabel != NULL);
    658 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
    659 		    "got client label from request(1)", struct svc_req *, req);
    660 
    661 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
    662 			if ((equal_label = do_rfs_label_check(clabel, vp,
    663 			    EQUALITY_CHECK)) == B_FALSE) {
    664 				dominant_label = do_rfs_label_check(clabel,
    665 				    vp, DOMINANCE_CHECK);
    666 			} else
    667 				dominant_label = B_TRUE;
    668 			admin_low_client = B_FALSE;
    669 		} else
    670 			admin_low_client = B_TRUE;
    671 	}
    672 
    673 	if (args->access & ACCESS3_READ) {
    674 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
    675 		if (error) {
    676 			if (curthread->t_flag & T_WOULDBLOCK)
    677 				goto out;
    678 		} else if (!MANDLOCK(vp, va.va_mode) &&
    679 		    (!is_system_labeled() || admin_low_client ||
    680 		    dominant_label))
    681 			resp->resok.access |= ACCESS3_READ;
    682 	}
    683 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
    684 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
    685 		if (error) {
    686 			if (curthread->t_flag & T_WOULDBLOCK)
    687 				goto out;
    688 		} else if (!is_system_labeled() || admin_low_client ||
    689 		    dominant_label)
    690 			resp->resok.access |= ACCESS3_LOOKUP;
    691 	}
    692 	if (checkwriteperm &&
    693 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
    694 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
    695 		if (error) {
    696 			if (curthread->t_flag & T_WOULDBLOCK)
    697 				goto out;
    698 		} else if (!MANDLOCK(vp, va.va_mode) &&
    699 		    (!is_system_labeled() || admin_low_client || equal_label)) {
    700 			resp->resok.access |=
    701 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
    702 		}
    703 	}
    704 	if (checkwriteperm &&
    705 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
    706 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
    707 		if (error) {
    708 			if (curthread->t_flag & T_WOULDBLOCK)
    709 				goto out;
    710 		} else if (!is_system_labeled() || admin_low_client ||
    711 		    equal_label)
    712 			resp->resok.access |= ACCESS3_DELETE;
    713 	}
    714 	if (args->access & ACCESS3_EXECUTE) {
    715 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
    716 		if (error) {
    717 			if (curthread->t_flag & T_WOULDBLOCK)
    718 				goto out;
    719 		} else if (!MANDLOCK(vp, va.va_mode) &&
    720 		    (!is_system_labeled() || admin_low_client ||
    721 		    dominant_label))
    722 			resp->resok.access |= ACCESS3_EXECUTE;
    723 	}
    724 
    725 #ifdef DEBUG
    726 	if (rfs3_do_post_op_attr) {
    727 		va.va_mask = AT_ALL;
    728 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
    729 	} else
    730 		vap = NULL;
    731 #else
    732 	va.va_mask = AT_ALL;
    733 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
    734 #endif
    735 
    736 	resp->status = NFS3_OK;
    737 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
    738 
    739 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
    740 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
    741 
    742 	VN_RELE(vp);
    743 
    744 	return;
    745 
    746 out:
    747 	if (curthread->t_flag & T_WOULDBLOCK) {
    748 		curthread->t_flag &= ~T_WOULDBLOCK;
    749 		resp->status = NFS3ERR_JUKEBOX;
    750 	} else
    751 		resp->status = puterrno3(error);
    752 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
    753 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
    754 	if (vp != NULL)
    755 		VN_RELE(vp);
    756 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
    757 }
    758 
    759 void *
    760 rfs3_access_getfh(ACCESS3args *args)
    761 {
    762 
    763 	return (&args->object);
    764 }
    765 
    766 /* ARGSUSED */
    767 void
    768 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
    769 	struct svc_req *req, cred_t *cr)
    770 {
    771 	int error;
    772 	vnode_t *vp;
    773 	struct vattr *vap;
    774 	struct vattr va;
    775 	struct iovec iov;
    776 	struct uio uio;
    777 	char *data;
    778 
    779 	vap = NULL;
    780 
    781 	vp = nfs3_fhtovp(&args->symlink, exi);
    782 
    783 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
    784 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
    785 
    786 	if (vp == NULL) {
    787 		error = ESTALE;
    788 		goto out;
    789 	}
    790 
    791 	va.va_mask = AT_ALL;
    792 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
    793 	if (error)
    794 		goto out;
    795 
    796 #ifdef DEBUG
    797 	if (rfs3_do_post_op_attr)
    798 		vap = &va;
    799 #else
    800 	vap = &va;
    801 #endif
    802 
    803 	if (vp->v_type != VLNK) {
    804 		resp->status = NFS3ERR_INVAL;
    805 		goto out1;
    806 	}
    807 
    808 	if (MANDLOCK(vp, va.va_mode)) {
    809 		resp->status = NFS3ERR_ACCES;
    810 		goto out1;
    811 	}
    812 
    813 	if (is_system_labeled()) {
    814 		bslabel_t *clabel = req->rq_label;
    815 
    816 		ASSERT(clabel != NULL);
    817 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
    818 		    "got client label from request(1)", struct svc_req *, req);
    819 
    820 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
    821 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
    822 				resp->status = NFS3ERR_ACCES;
    823 				goto out1;
    824 			}
    825 		}
    826 	}
    827 
    828 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
    829 
    830 	iov.iov_base = data;
    831 	iov.iov_len = MAXPATHLEN;
    832 	uio.uio_iov = &iov;
    833 	uio.uio_iovcnt = 1;
    834 	uio.uio_segflg = UIO_SYSSPACE;
    835 	uio.uio_extflg = UIO_COPY_CACHED;
    836 	uio.uio_loffset = 0;
    837 	uio.uio_resid = MAXPATHLEN;
    838 
    839 	error = VOP_READLINK(vp, &uio, cr, NULL);
    840 
    841 #ifdef DEBUG
    842 	if (rfs3_do_post_op_attr) {
    843 		va.va_mask = AT_ALL;
    844 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
    845 	} else
    846 		vap = NULL;
    847 #else
    848 	va.va_mask = AT_ALL;
    849 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
    850 #endif
    851 
    852 #if 0 /* notyet */
    853 	/*
    854 	 * Don't do this.  It causes local disk writes when just
    855 	 * reading the file and the overhead is deemed larger
    856 	 * than the benefit.
    857 	 */
    858 	/*
    859 	 * Force modified metadata out to stable storage.
    860 	 */
    861 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
    862 #endif
    863 
    864 	if (error) {
    865 		kmem_free(data, MAXPATHLEN + 1);
    866 		goto out;
    867 	}
    868 
    869 	resp->status = NFS3_OK;
    870 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
    871 	resp->resok.data = data;
    872 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
    873 
    874 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
    875 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
    876 	VN_RELE(vp);
    877 
    878 	return;
    879 
    880 out:
    881 	if (curthread->t_flag & T_WOULDBLOCK) {
    882 		curthread->t_flag &= ~T_WOULDBLOCK;
    883 		resp->status = NFS3ERR_JUKEBOX;
    884 	} else
    885 		resp->status = puterrno3(error);
    886 out1:
    887 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
    888 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
    889 	if (vp != NULL)
    890 		VN_RELE(vp);
    891 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
    892 }
    893 
    894 void *
    895 rfs3_readlink_getfh(READLINK3args *args)
    896 {
    897 
    898 	return (&args->symlink);
    899 }
    900 
    901 void
    902 rfs3_readlink_free(READLINK3res *resp)
    903 {
    904 
    905 	if (resp->status == NFS3_OK)
    906 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
    907 }
    908 
    909 /*
    910  * Server routine to handle read
    911  * May handle RDMA data as well as mblks
    912  */
    913 /* ARGSUSED */
    914 void
    915 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
    916 	struct svc_req *req, cred_t *cr)
    917 {
    918 	int error;
    919 	vnode_t *vp;
    920 	struct vattr *vap;
    921 	struct vattr va;
    922 	struct iovec iov;
    923 	struct uio uio;
    924 	u_offset_t offset;
    925 	mblk_t *mp;
    926 	int alloc_err = 0;
    927 	int in_crit = 0;
    928 	int need_rwunlock = 0;
    929 	caller_context_t ct;
    930 
    931 	vap = NULL;
    932 
    933 	vp = nfs3_fhtovp(&args->file, exi);
    934 
    935 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
    936 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
    937 
    938 	if (vp == NULL) {
    939 		error = ESTALE;
    940 		goto out;
    941 	}
    942 
    943 	if (is_system_labeled()) {
    944 		bslabel_t *clabel = req->rq_label;
    945 
    946 		ASSERT(clabel != NULL);
    947 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
    948 		    "got client label from request(1)", struct svc_req *, req);
    949 
    950 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
    951 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
    952 				resp->status = NFS3ERR_ACCES;
    953 				goto out1;
    954 			}
    955 		}
    956 	}
    957 
    958 	ct.cc_sysid = 0;
    959 	ct.cc_pid = 0;
    960 	ct.cc_caller_id = nfs3_srv_caller_id;
    961 	ct.cc_flags = CC_DONTBLOCK;
    962 
    963 	/*
    964 	 * Enter the critical region before calling VOP_RWLOCK
    965 	 * to avoid a deadlock with write requests.
    966 	 */
    967 	if (nbl_need_check(vp)) {
    968 		nbl_start_crit(vp, RW_READER);
    969 		in_crit = 1;
    970 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
    971 		    NULL)) {
    972 			error = EACCES;
    973 			goto out;
    974 		}
    975 	}
    976 
    977 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
    978 
    979 	/* check if a monitor detected a delegation conflict */
    980 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
    981 		resp->status = NFS3ERR_JUKEBOX;
    982 		goto out1;
    983 	}
    984 
    985 	need_rwunlock = 1;
    986 
    987 	va.va_mask = AT_ALL;
    988 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
    989 
    990 	/*
    991 	 * If we can't get the attributes, then we can't do the
    992 	 * right access checking.  So, we'll fail the request.
    993 	 */
    994 	if (error)
    995 		goto out;
    996 
    997 #ifdef DEBUG
    998 	if (rfs3_do_post_op_attr)
    999 		vap = &va;
   1000 #else
   1001 	vap = &va;
   1002 #endif
   1003 
   1004 	if (vp->v_type != VREG) {
   1005 		resp->status = NFS3ERR_INVAL;
   1006 		goto out1;
   1007 	}
   100