Home | History | Annotate | Download | only in fs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     22 /*	  All Rights Reserved  	*/
     23 
     24 
     25 /*
     26  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     27  * Use is subject to license terms.
     28  */
     29 
     30 /*
     31  * Generic vnode operations.
     32  */
     33 #include <sys/types.h>
     34 #include <sys/param.h>
     35 #include <sys/systm.h>
     36 #include <sys/errno.h>
     37 #include <sys/fcntl.h>
     38 #include <sys/flock.h>
     39 #include <sys/statvfs.h>
     40 #include <sys/vfs.h>
     41 #include <sys/vnode.h>
     42 #include <sys/proc.h>
     43 #include <sys/user.h>
     44 #include <sys/unistd.h>
     45 #include <sys/cred.h>
     46 #include <sys/poll.h>
     47 #include <sys/debug.h>
     48 #include <sys/cmn_err.h>
     49 #include <sys/stream.h>
     50 #include <fs/fs_subr.h>
     51 #include <fs/fs_reparse.h>
     52 #include <sys/door.h>
     53 #include <sys/acl.h>
     54 #include <sys/share.h>
     55 #include <sys/file.h>
     56 #include <sys/kmem.h>
     57 #include <sys/file.h>
     58 #include <sys/nbmlock.h>
     59 #include <acl/acl_common.h>
     60 #include <sys/pathname.h>
     61 
     62 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
     63 
     64 /*
     65  * Tunable to limit the number of retry to recover from STALE error.
     66  */
     67 int fs_estale_retry = 5;
     68 
     69 /*
     70  * supports for reparse point door upcall
     71  */
     72 static door_handle_t reparsed_door;
     73 static kmutex_t reparsed_door_lock;
     74 
     75 /*
     76  * The associated operation is not supported by the file system.
     77  */
     78 int
     79 fs_nosys()
     80 {
     81 	return (ENOSYS);
     82 }
     83 
     84 /*
     85  * The associated operation is invalid (on this vnode).
     86  */
     87 int
     88 fs_inval()
     89 {
     90 	return (EINVAL);
     91 }
     92 
     93 /*
     94  * The associated operation is valid only for directories.
     95  */
     96 int
     97 fs_notdir()
     98 {
     99 	return (ENOTDIR);
    100 }
    101 
    102 /*
    103  * Free the file system specific resources. For the file systems that
    104  * do not support the forced unmount, it will be a nop function.
    105  */
    106 
    107 /*ARGSUSED*/
    108 void
    109 fs_freevfs(vfs_t *vfsp)
    110 {
    111 }
    112 
    113 /* ARGSUSED */
    114 int
    115 fs_nosys_map(struct vnode *vp,
    116 	offset_t off,
    117 	struct as *as,
    118 	caddr_t *addrp,
    119 	size_t len,
    120 	uchar_t prot,
    121 	uchar_t maxprot,
    122 	uint_t flags,
    123 	struct cred *cr,
    124 	caller_context_t *ct)
    125 {
    126 	return (ENOSYS);
    127 }
    128 
    129 /* ARGSUSED */
    130 int
    131 fs_nosys_addmap(struct vnode *vp,
    132 	offset_t off,
    133 	struct as *as,
    134 	caddr_t addr,
    135 	size_t len,
    136 	uchar_t prot,
    137 	uchar_t maxprot,
    138 	uint_t flags,
    139 	struct cred *cr,
    140 	caller_context_t *ct)
    141 {
    142 	return (ENOSYS);
    143 }
    144 
    145 /* ARGSUSED */
    146 int
    147 fs_nosys_poll(vnode_t *vp,
    148 	register short events,
    149 	int anyyet,
    150 	register short *reventsp,
    151 	struct pollhead **phpp,
    152 	caller_context_t *ct)
    153 {
    154 	return (ENOSYS);
    155 }
    156 
    157 
    158 /*
    159  * The file system has nothing to sync to disk.  However, the
    160  * VFS_SYNC operation must not fail.
    161  */
    162 /* ARGSUSED */
    163 int
    164 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
    165 {
    166 	return (0);
    167 }
    168 
    169 /*
    170  * Does nothing but VOP_FSYNC must not fail.
    171  */
    172 /* ARGSUSED */
    173 int
    174 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
    175 {
    176 	return (0);
    177 }
    178 
    179 /*
    180  * Does nothing but VOP_PUTPAGE must not fail.
    181  */
    182 /* ARGSUSED */
    183 int
    184 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
    185 	caller_context_t *ctp)
    186 {
    187 	return (0);
    188 }
    189 
    190 /*
    191  * Does nothing but VOP_IOCTL must not fail.
    192  */
    193 /* ARGSUSED */
    194 int
    195 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
    196 	int *rvalp)
    197 {
    198 	return (0);
    199 }
    200 
    201 /*
    202  * Read/write lock/unlock.  Does nothing.
    203  */
    204 /* ARGSUSED */
    205 int
    206 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
    207 {
    208 	return (-1);
    209 }
    210 
    211 /* ARGSUSED */
    212 void
    213 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
    214 {
    215 }
    216 
    217 /*
    218  * Compare two vnodes.
    219  */
    220 /*ARGSUSED2*/
    221 int
    222 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
    223 {
    224 	return (vp1 == vp2);
    225 }
    226 
    227 /*
    228  * No-op seek operation.
    229  */
    230 /* ARGSUSED */
    231 int
    232 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
    233 {
    234 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
    235 }
    236 
    237 /*
    238  * File and record locking.
    239  */
    240 /* ARGSUSED */
    241 int
    242 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
    243 	offset_t offset, flk_callback_t *flk_cbp, cred_t *cr,
    244 	caller_context_t *ct)
    245 {
    246 	int frcmd;
    247 	int nlmid;
    248 	int error = 0;
    249 	flk_callback_t serialize_callback;
    250 	int serialize = 0;
    251 	v_mode_t mode;
    252 
    253 	switch (cmd) {
    254 
    255 	case F_GETLK:
    256 	case F_O_GETLK:
    257 		if (flag & F_REMOTELOCK) {
    258 			frcmd = RCMDLCK;
    259 		} else if (flag & F_PXFSLOCK) {
    260 			frcmd = PCMDLCK;
    261 		} else {
    262 			frcmd = 0;
    263 			bfp->l_pid = ttoproc(curthread)->p_pid;
    264 			bfp->l_sysid = 0;
    265 		}
    266 		break;
    267 
    268 	case F_SETLK_NBMAND:
    269 		/*
    270 		 * Are NBMAND locks allowed on this file?
    271 		 */
    272 		if (!vp->v_vfsp ||
    273 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
    274 			error = EINVAL;
    275 			goto done;
    276 		}
    277 		if (vp->v_type != VREG) {
    278 			error = EINVAL;
    279 			goto done;
    280 		}
    281 		/*FALLTHROUGH*/
    282 
    283 	case F_SETLK:
    284 		if (flag & F_REMOTELOCK) {
    285 			frcmd = SETFLCK|RCMDLCK;
    286 		} else if (flag & F_PXFSLOCK) {
    287 			frcmd = SETFLCK|PCMDLCK;
    288 		} else {
    289 			frcmd = SETFLCK;
    290 			bfp->l_pid = ttoproc(curthread)->p_pid;
    291 			bfp->l_sysid = 0;
    292 		}
    293 		if (cmd == F_SETLK_NBMAND &&
    294 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
    295 			frcmd |= NBMLCK;
    296 		}
    297 
    298 		if (nbl_need_check(vp)) {
    299 			nbl_start_crit(vp, RW_WRITER);
    300 			serialize = 1;
    301 			if (frcmd & NBMLCK) {
    302 				mode = (bfp->l_type == F_RDLCK) ?
    303 				    V_READ : V_RDANDWR;
    304 				if (vn_is_mapped(vp, mode)) {
    305 					error = EAGAIN;
    306 					goto done;
    307 				}
    308 			}
    309 		}
    310 		break;
    311 
    312 	case F_SETLKW:
    313 		if (flag & F_REMOTELOCK) {
    314 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
    315 		} else if (flag & F_PXFSLOCK) {
    316 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
    317 		} else {
    318 			frcmd = SETFLCK|SLPFLCK;
    319 			bfp->l_pid = ttoproc(curthread)->p_pid;
    320 			bfp->l_sysid = 0;
    321 		}
    322 
    323 		if (nbl_need_check(vp)) {
    324 			nbl_start_crit(vp, RW_WRITER);
    325 			serialize = 1;
    326 		}
    327 		break;
    328 
    329 	case F_HASREMOTELOCKS:
    330 		nlmid = GETNLMID(bfp->l_sysid);
    331 		if (nlmid != 0) {	/* booted as a cluster */
    332 			l_has_rmt(bfp) =
    333 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
    334 		} else {		/* not booted as a cluster */
    335 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
    336 		}
    337 
    338 		goto done;
    339 
    340 	default:
    341 		error = EINVAL;
    342 		goto done;
    343 	}
    344 
    345 	/*
    346 	 * If this is a blocking lock request and we're serializing lock
    347 	 * requests, modify the callback list to leave the critical region
    348 	 * while we're waiting for the lock.
    349 	 */
    350 
    351 	if (serialize && (frcmd & SLPFLCK) != 0) {
    352 		flk_add_callback(&serialize_callback,
    353 		    frlock_serialize_blocked, vp, flk_cbp);
    354 		flk_cbp = &serialize_callback;
    355 	}
    356 
    357 	error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
    358 
    359 done:
    360 	if (serialize)
    361 		nbl_end_crit(vp);
    362 
    363 	return (error);
    364 }
    365 
    366 /*
    367  * Callback when a lock request blocks and we are serializing requests.  If
    368  * before sleeping, leave the critical region.  If after wakeup, reenter
    369  * the critical region.
    370  */
    371 
    372 static callb_cpr_t *
    373 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
    374 {
    375 	vnode_t *vp = (vnode_t *)infop;
    376 
    377 	if (when == FLK_BEFORE_SLEEP)
    378 		nbl_end_crit(vp);
    379 	else {
    380 		nbl_start_crit(vp, RW_WRITER);
    381 	}
    382 
    383 	return (NULL);
    384 }
    385 
    386 /*
    387  * Allow any flags.
    388  */
    389 /* ARGSUSED */
    390 int
    391 fs_setfl(
    392 	vnode_t *vp,
    393 	int oflags,
    394 	int nflags,
    395 	cred_t *cr,
    396 	caller_context_t *ct)
    397 {
    398 	return (0);
    399 }
    400 
    401 /*
    402  * Return the answer requested to poll() for non-device files.
    403  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
    404  */
    405 struct pollhead fs_pollhd;
    406 
    407 /* ARGSUSED */
    408 int
    409 fs_poll(vnode_t *vp,
    410 	register short events,
    411 	int anyyet,
    412 	register short *reventsp,
    413 	struct pollhead **phpp,
    414 	caller_context_t *ct)
    415 {
    416 	*reventsp = 0;
    417 	if (events & POLLIN)
    418 		*reventsp |= POLLIN;
    419 	if (events & POLLRDNORM)
    420 		*reventsp |= POLLRDNORM;
    421 	if (events & POLLRDBAND)
    422 		*reventsp |= POLLRDBAND;
    423 	if (events & POLLOUT)
    424 		*reventsp |= POLLOUT;
    425 	if (events & POLLWRBAND)
    426 		*reventsp |= POLLWRBAND;
    427 	*phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
    428 	return (0);
    429 }
    430 
    431 /*
    432  * POSIX pathconf() support.
    433  */
    434 /* ARGSUSED */
    435 int
    436 fs_pathconf(
    437 	vnode_t *vp,
    438 	int cmd,
    439 	ulong_t *valp,
    440 	cred_t *cr,
    441 	caller_context_t *ct)
    442 {
    443 	register ulong_t val;
    444 	register int error = 0;
    445 	struct statvfs64 vfsbuf;
    446 
    447 	switch (cmd) {
    448 
    449 	case _PC_LINK_MAX:
    450 		val = MAXLINK;
    451 		break;
    452 
    453 	case _PC_MAX_CANON:
    454 		val = MAX_CANON;
    455 		break;
    456 
    457 	case _PC_MAX_INPUT:
    458 		val = MAX_INPUT;
    459 		break;
    460 
    461 	case _PC_NAME_MAX:
    462 		bzero(&vfsbuf, sizeof (vfsbuf));
    463 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
    464 			break;
    465 		val = vfsbuf.f_namemax;
    466 		break;
    467 
    468 	case _PC_PATH_MAX:
    469 	case _PC_SYMLINK_MAX:
    470 		val = MAXPATHLEN;
    471 		break;
    472 
    473 	case _PC_PIPE_BUF:
    474 		val = PIPE_BUF;
    475 		break;
    476 
    477 	case _PC_NO_TRUNC:
    478 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
    479 			val = 1;	/* NOTRUNC is enabled for vp */
    480 		else
    481 			val = (ulong_t)-1;
    482 		break;
    483 
    484 	case _PC_VDISABLE:
    485 		val = _POSIX_VDISABLE;
    486 		break;
    487 
    488 	case _PC_CHOWN_RESTRICTED:
    489 		if (rstchown)
    490 			val = rstchown; /* chown restricted enabled */
    491 		else
    492 			val = (ulong_t)-1;
    493 		break;
    494 
    495 	case _PC_FILESIZEBITS:
    496 
    497 		/*
    498 		 * If ever we come here it means that underlying file system
    499 		 * does not recognise the command and therefore this
    500 		 * configurable limit cannot be determined. We return -1
    501 		 * and don't change errno.
    502 		 */
    503 
    504 		val = (ulong_t)-1;    /* large file support */
    505 		break;
    506 
    507 	case _PC_ACL_ENABLED:
    508 		val = 0;
    509 		break;
    510 
    511 	case _PC_CASE_BEHAVIOR:
    512 		val = _CASE_SENSITIVE;
    513 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
    514 			val |= _CASE_INSENSITIVE;
    515 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
    516 			val &= ~_CASE_SENSITIVE;
    517 		break;
    518 
    519 	case _PC_SATTR_ENABLED:
    520 	case _PC_SATTR_EXISTS:
    521 		val = 0;
    522 		break;
    523 
    524 	case _PC_ACCESS_FILTERING:
    525 		val = 0;
    526 		break;
    527 
    528 	default:
    529 		error = EINVAL;
    530 		break;
    531 	}
    532 
    533 	if (error == 0)
    534 		*valp = val;
    535 	return (error);
    536 }
    537 
    538 /*
    539  * Dispose of a page.
    540  */
    541 /* ARGSUSED */
    542 void
    543 fs_dispose(
    544 	struct vnode *vp,
    545 	page_t *pp,
    546 	int fl,
    547 	int dn,
    548 	struct cred *cr,
    549 	caller_context_t *ct)
    550 {
    551 
    552 	ASSERT(fl == B_FREE || fl == B_INVAL);
    553 
    554 	if (fl == B_FREE)
    555 		page_free(pp, dn);
    556 	else
    557 		page_destroy(pp, dn);
    558 }
    559 
    560 /* ARGSUSED */
    561 void
    562 fs_nodispose(
    563 	struct vnode *vp,
    564 	page_t *pp,
    565 	int fl,
    566 	int dn,
    567 	struct cred *cr,
    568 	caller_context_t *ct)
    569 {
    570 	cmn_err(CE_PANIC, "fs_nodispose invoked");
    571 }
    572 
    573 /*
    574  * fabricate acls for file systems that do not support acls.
    575  */
    576 /* ARGSUSED */
    577 int
    578 fs_fab_acl(
    579 	vnode_t *vp,
    580 	vsecattr_t *vsecattr,
    581 	int flag,
    582 	cred_t *cr,
    583 	caller_context_t *ct)
    584 {
    585 	aclent_t	*aclentp;
    586 	ace_t		*acep;
    587 	struct vattr	vattr;
    588 	int		error;
    589 	size_t		aclsize;
    590 
    591 	vsecattr->vsa_aclcnt	= 0;
    592 	vsecattr->vsa_aclentsz	= 0;
    593 	vsecattr->vsa_aclentp	= NULL;
    594 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
    595 	vsecattr->vsa_dfaclentp	= NULL;
    596 
    597 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
    598 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
    599 		return (error);
    600 
    601 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
    602 		aclsize = 4 * sizeof (aclent_t);
    603 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
    604 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
    605 		aclentp = vsecattr->vsa_aclentp;
    606 
    607 		aclentp->a_type = USER_OBJ;	/* Owner */
    608 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
    609 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
    610 		aclentp++;
    611 
    612 		aclentp->a_type = GROUP_OBJ;    /* Group */
    613 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
    614 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
    615 		aclentp++;
    616 
    617 		aclentp->a_type = OTHER_OBJ;    /* Other */
    618 		aclentp->a_perm = vattr.va_mode & 0007;
    619 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
    620 		aclentp++;
    621 
    622 		aclentp->a_type = CLASS_OBJ;    /* Class */
    623 		aclentp->a_perm = (ushort_t)(0007);
    624 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
    625 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
    626 		aclsize = 6 * sizeof (ace_t);
    627 		vsecattr->vsa_aclcnt	= 6;
    628 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
    629 		vsecattr->vsa_aclentsz = aclsize;
    630 		acep = vsecattr->vsa_aclentp;
    631 		(void) memcpy(acep, trivial_acl, sizeof (ace_t) * 6);
    632 		adjust_ace_pair(acep, (vattr.va_mode & 0700) >> 6);
    633 		adjust_ace_pair(acep + 2, (vattr.va_mode & 0070) >> 3);
    634 		adjust_ace_pair(acep + 4, vattr.va_mode & 0007);
    635 	}
    636 
    637 	return (0);
    638 }
    639 
    640 /*
    641  * Common code for implementing DOS share reservations
    642  */
    643 /* ARGSUSED4 */
    644 int
    645 fs_shrlock(
    646 	struct vnode *vp,
    647 	int cmd,
    648 	struct shrlock *shr,
    649 	int flag,
    650 	cred_t *cr,
    651 	caller_context_t *ct)
    652 {
    653 	int error;
    654 
    655 	/*
    656 	 * Make sure that the file was opened with permissions appropriate
    657 	 * for the request, and make sure the caller isn't trying to sneak
    658 	 * in an NBMAND request.
    659 	 */
    660 	if (cmd == F_SHARE) {
    661 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
    662 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
    663 			return (EBADF);
    664 		if (shr->s_access & (F_RMACC | F_MDACC))
    665 			return (EINVAL);
    666 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
    667 			return (EINVAL);
    668 	}
    669 	if (cmd == F_SHARE_NBMAND) {
    670 		/* make sure nbmand is allowed on the file */
    671 		if (!vp->v_vfsp ||
    672 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
    673 			return (EINVAL);
    674 		}
    675 		if (vp->v_type != VREG) {
    676 			return (EINVAL);
    677 		}
    678 	}
    679 
    680 	nbl_start_crit(vp, RW_WRITER);
    681 
    682 	switch (cmd) {
    683 
    684 	case F_SHARE_NBMAND:
    685 		shr->s_deny |= F_MANDDNY;
    686 		/*FALLTHROUGH*/
    687 	case F_SHARE:
    688 		error = add_share(vp, shr);
    689 		break;
    690 
    691 	case F_UNSHARE:
    692 		error = del_share(vp, shr);
    693 		break;
    694 
    695 	case F_HASREMOTELOCKS:
    696 		/*
    697 		 * We are overloading this command to refer to remote
    698 		 * shares as well as remote locks, despite its name.
    699 		 */
    700 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
    701 		error = 0;
    702 		break;
    703 
    704 	default:
    705 		error = EINVAL;
    706 		break;
    707 	}
    708 
    709 	nbl_end_crit(vp);
    710 	return (error);
    711 }
    712 
    713 /*ARGSUSED1*/
    714 int
    715 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
    716     caller_context_t *ct)
    717 {
    718 	ASSERT(vp != NULL);
    719 	return (ENOTSUP);
    720 }
    721 
    722 /*ARGSUSED1*/
    723 int
    724 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
    725     caller_context_t *ct)
    726 {
    727 	ASSERT(vp != NULL);
    728 	return (0);
    729 }
    730 
    731 /*
    732  * return 1 for non-trivial ACL.
    733  *
    734  * NB: It is not necessary for the caller to VOP_RWLOCK since
    735  *	we only issue VOP_GETSECATTR.
    736  *
    737  * Returns 0 == trivial
    738  *         1 == NOT Trivial
    739  *	   <0 could not determine.
    740  */
    741 int
    742 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
    743 {
    744 	ulong_t		acl_styles;
    745 	ulong_t		acl_flavor;
    746 	vsecattr_t 	vsecattr;
    747 	int 		error;
    748 	int		isnontrivial;
    749 
    750 	/* determine the forms of ACLs maintained */
    751 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
    752 
    753 	/* clear bits we don't understand and establish default acl_style */
    754 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
    755 	if (error || (acl_styles == 0))
    756 		acl_styles = _ACL_ACLENT_ENABLED;
    757 
    758 	vsecattr.vsa_aclentp = NULL;
    759 	vsecattr.vsa_dfaclentp = NULL;
    760 	vsecattr.vsa_aclcnt = 0;
    761 	vsecattr.vsa_dfaclcnt = 0;
    762 
    763 	while (acl_styles) {
    764 		/* select one of the styles as current flavor */
    765 		acl_flavor = 0;
    766 		if (acl_styles & _ACL_ACLENT_ENABLED) {
    767 			acl_flavor = _ACL_ACLENT_ENABLED;
    768 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
    769 		} else if (acl_styles & _ACL_ACE_ENABLED) {
    770 			acl_flavor = _ACL_ACE_ENABLED;
    771 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
    772 		}
    773 
    774 		ASSERT(vsecattr.vsa_mask && acl_flavor);
    775 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
    776 		if (error == 0)
    777 			break;
    778 
    779 		/* that flavor failed */
    780 		acl_styles &= ~acl_flavor;
    781 	}
    782 
    783 	/* if all styles fail then assume trivial */
    784 	if (acl_styles == 0)
    785 		return (0);
    786 
    787 	/* process the flavor that worked */
    788 	isnontrivial = 0;
    789 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
    790 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
    791 			isnontrivial = 1;
    792 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
    793 			kmem_free(vsecattr.vsa_aclentp,
    794 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
    795 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
    796 			kmem_free(vsecattr.vsa_dfaclentp,
    797 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
    798 	}
    799 	if (acl_flavor & _ACL_ACE_ENABLED) {
    800 
    801 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
    802 		    vsecattr.vsa_aclcnt);
    803 
    804 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
    805 			kmem_free(vsecattr.vsa_aclentp,
    806 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
    807 		/* ACE has no vsecattr.vsa_dfaclcnt */
    808 	}
    809 	return (isnontrivial);
    810 }
    811 
    812 /*
    813  * Check whether we need a retry to recover from STALE error.
    814  */
    815 int
    816 fs_need_estale_retry(int retry_count)
    817 {
    818 	if (retry_count < fs_estale_retry)
    819 		return (1);
    820 	else
    821 		return (0);
    822 }
    823 
    824 
    825 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
    826 
    827 /*
    828  * Routine for anti-virus scanner to call to register its scanning routine.
    829  */
    830 void
    831 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
    832 {
    833 	fs_av_scan = av_scan;
    834 }
    835 
    836 /*
    837  * Routine for file systems to call to initiate anti-virus scanning.
    838  * Scanning will only be done on REGular files (currently).
    839  */
    840 int
    841 fs_vscan(vnode_t *vp, cred_t *cr, int async)
    842 {
    843 	int ret = 0;
    844 
    845 	if (fs_av_scan && vp->v_type == VREG)
    846 		ret = (*fs_av_scan)(vp, cr, async);
    847 
    848 	return (ret);
    849 }
    850 
    851 /*
    852  * support functions for reparse point
    853  */
    854 /*
    855  * reparse_vnode_parse
    856  *
    857  * Read the symlink data of a reparse point specified by the vnode
    858  * and return the reparse data as name-value pair in the nvlist.
    859  */
    860 int
    861 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
    862 {
    863 	int err;
    864 	char *lkdata;
    865 	struct uio uio;
    866 	struct iovec iov;
    867 
    868 	if (vp == NULL || nvl == NULL)
    869 		return (EINVAL);
    870 
    871 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
    872 
    873 	/*
    874 	 * Set up io vector to read sym link data
    875 	 */
    876 	iov.iov_base = lkdata;
    877 	iov.iov_len = MAXREPARSELEN;
    878 	uio.uio_iov = &iov;
    879 	uio.uio_iovcnt = 1;
    880 	uio.uio_segflg = UIO_SYSSPACE;
    881 	uio.uio_extflg = UIO_COPY_CACHED;
    882 	uio.uio_loffset = (offset_t)0;
    883 	uio.uio_resid = MAXREPARSELEN;
    884 
    885 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
    886 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
    887 		err = reparse_parse(lkdata, nvl);
    888 	}
    889 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
    890 
    891 	return (err);
    892 }
    893 
    894 void
    895 reparse_point_init()
    896 {
    897 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
    898 }
    899 
    900 static door_handle_t
    901 reparse_door_get_handle()
    902 {
    903 	door_handle_t dh;
    904 
    905 	mutex_enter(&reparsed_door_lock);
    906 	if ((dh = reparsed_door) == NULL) {
    907 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
    908 			reparsed_door = NULL;
    909 			dh = NULL;
    910 		} else
    911 			dh = reparsed_door;
    912 	}
    913 	mutex_exit(&reparsed_door_lock);
    914 	return (dh);
    915 }
    916 
    917 static void
    918 reparse_door_reset_handle()
    919 {
    920 	mutex_enter(&reparsed_door_lock);
    921 	reparsed_door = NULL;
    922 	mutex_exit(&reparsed_door_lock);
    923 }
    924 
    925 /*
    926  * reparse_kderef
    927  *
    928  * Accepts the service-specific item from the reparse point and returns
    929  * the service-specific data requested.  The caller specifies the size of
    930  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
    931  * if the results will not fit in the buffer, in which case, *bufsz will
    932  * contain the number of bytes needed to hold the results.
    933  *
    934  * if ok return 0 and update *bufsize with length of actual result
    935  * else return error code.
    936  */
    937 int
    938 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
    939     size_t *bufsize)
    940 {
    941 	int err, retries, need_free;
    942 	size_t dlen, res_len;
    943 	char *darg;
    944 	door_arg_t door_args;
    945 	reparsed_door_res_t *resp;
    946 	door_handle_t rp_door;
    947 
    948 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
    949 	    bufsize == NULL)
    950 		return (EINVAL);
    951 
    952 	/* get reparsed's door handle */
    953 	if ((rp_door = reparse_door_get_handle()) == NULL)
    954 		return (EBADF);
    955 
    956 	/* setup buffer for door_call args and results */
    957 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
    958 	if (*bufsize < dlen) {
    959 		darg = kmem_alloc(dlen, KM_SLEEP);
    960 		need_free = 1;
    961 	} else {
    962 		darg = buf;	/* use same buffer for door's args & results */
    963 		need_free = 0;
    964 	}
    965 
    966 	/* build argument string of door call */
    967 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
    968 
    969 	/* setup args for door call */
    970 	door_args.data_ptr = darg;
    971 	door_args.data_size = dlen;
    972 	door_args.desc_ptr = NULL;
    973 	door_args.desc_num = 0;
    974 	door_args.rbuf = buf;
    975 	door_args.rsize = *bufsize;
    976 
    977 	/* do the door_call */
    978 	retries = 0;
    979 	door_ki_hold(rp_door);
    980 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
    981 	    NULL, SIZE_MAX, 0)) != 0) {
    982 		if (err == EAGAIN || err == EINTR) {
    983 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
    984 				delay(SEC_TO_TICK(1));
    985 				continue;
    986 			}
    987 		} else if (err == EBADF) {
    988 			/* door server goes away... */
    989 			reparse_door_reset_handle();
    990 		}
    991 		break;
    992 	}
    993 	door_ki_rele(rp_door);
    994 	if (need_free)
    995 		kmem_free(darg, dlen);		/* done with args buffer */
    996 
    997 	if (err != 0)
    998 		return (err);
    999 
   1000 	resp = (reparsed_door_res_t *)door_args.rbuf;
   1001 	if ((err = resp->res_status) == 0) {
   1002 		/*
   1003 		 * have to save the length of the results before the
   1004 		 * bcopy below since it's can be an overlap copy that
   1005 		 * overwrites the reparsed_door_res_t structure at
   1006 		 * the beginning of the buffer.
   1007 		 */
   1008 		res_len = (size_t)resp->res_len;
   1009 
   1010 		/* deref call is ok */
   1011 		if (res_len > *bufsize)
   1012 			err = EOVERFLOW;
   1013 		else
   1014 			bcopy(resp->res_data, buf, res_len);
   1015 		*bufsize = res_len;
   1016 	}
   1017 	if (door_args.rbuf != buf)
   1018 		kmem_free(door_args.rbuf, door_args.rsize);
   1019 
   1020 	return (err);
   1021 }
   1022