Home | History | Annotate | Download | only in fs
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1488       rsb  * Common Development and Distribution License (the "License").
      6   1488       rsb  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*
     22  10793       dai  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0    stevel  * Use is subject to license terms.
     24      0    stevel  */
     25      0    stevel 
     26      0    stevel /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27      0    stevel /*	  All Rights Reserved  	*/
     28      0    stevel 
     29      0    stevel /*
     30      0    stevel  * University Copyright- Copyright (c) 1982, 1986, 1988
     31      0    stevel  * The Regents of the University of California
     32      0    stevel  * All Rights Reserved
     33      0    stevel  *
     34      0    stevel  * University Acknowledgment- Portions of this document are derived from
     35      0    stevel  * software developed by the University of California, Berkeley, and its
     36      0    stevel  * contributors.
     37      0    stevel  */
     38      0    stevel 
     39      0    stevel #include <sys/types.h>
     40      0    stevel #include <sys/t_lock.h>
     41      0    stevel #include <sys/param.h>
     42      0    stevel #include <sys/errno.h>
     43      0    stevel #include <sys/user.h>
     44      0    stevel #include <sys/fstyp.h>
     45      0    stevel #include <sys/kmem.h>
     46      0    stevel #include <sys/systm.h>
     47      0    stevel #include <sys/proc.h>
     48      0    stevel #include <sys/mount.h>
     49      0    stevel #include <sys/vfs.h>
     50   3898       rsb #include <sys/vfs_opreg.h>
     51      0    stevel #include <sys/fem.h>
     52      0    stevel #include <sys/mntent.h>
     53      0    stevel #include <sys/stat.h>
     54      0    stevel #include <sys/statvfs.h>
     55      0    stevel #include <sys/statfs.h>
     56      0    stevel #include <sys/cred.h>
     57      0    stevel #include <sys/vnode.h>
     58      0    stevel #include <sys/rwstlock.h>
     59      0    stevel #include <sys/dnlc.h>
     60      0    stevel #include <sys/file.h>
     61      0    stevel #include <sys/time.h>
     62      0    stevel #include <sys/atomic.h>
     63      0    stevel #include <sys/cmn_err.h>
     64      0    stevel #include <sys/buf.h>
     65      0    stevel #include <sys/swap.h>
     66      0    stevel #include <sys/debug.h>
     67      0    stevel #include <sys/vnode.h>
     68      0    stevel #include <sys/modctl.h>
     69      0    stevel #include <sys/ddi.h>
     70      0    stevel #include <sys/pathname.h>
     71      0    stevel #include <sys/bootconf.h>
     72      0    stevel #include <sys/dumphdr.h>
     73      0    stevel #include <sys/dc_ki.h>
     74      0    stevel #include <sys/poll.h>
     75      0    stevel #include <sys/sunddi.h>
     76      0    stevel #include <sys/sysmacros.h>
     77      0    stevel #include <sys/zone.h>
     78      0    stevel #include <sys/policy.h>
     79      0    stevel #include <sys/ctfs.h>
     80      0    stevel #include <sys/objfs.h>
     81      0    stevel #include <sys/console.h>
     82      0    stevel #include <sys/reboot.h>
     83   5331       amw #include <sys/attr.h>
     84  10922      Jeff #include <sys/zio.h>
     85   6423   gw25295 #include <sys/spa.h>
     86   6734   johnlev #include <sys/lofi.h>
     87   8194      Jack #include <sys/bootprops.h>
     88      0    stevel 
     89      0    stevel #include <vm/page.h>
     90      0    stevel 
     91      0    stevel #include <fs/fs_subr.h>
     92   1520       rsb /* Private interfaces to create vopstats-related data structures */
     93   1520       rsb extern void		initialize_vopstats(vopstats_t *);
     94   1520       rsb extern vopstats_t	*get_fstype_vopstats(struct vfs *, struct vfssw *);
     95   1520       rsb extern vsk_anchor_t	*get_vskstat_anchor(struct vfs *);
     96   1520       rsb 
     97      0    stevel static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
     98      0    stevel static void vfs_setmntopt_nolock(mntopts_t *, const char *,
     99      0    stevel     const char *, int, int);
    100      0    stevel static int  vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
    101      0    stevel static void vfs_freemnttab(struct vfs *);
    102      0    stevel static void vfs_freeopt(mntopt_t *);
    103      0    stevel static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
    104      0    stevel static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
    105      0    stevel static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
    106      0    stevel static void vfs_createopttbl_extend(mntopts_t *, const char *,
    107      0    stevel     const mntopts_t *);
    108      0    stevel static char **vfs_copycancelopt_extend(char **const, int);
    109      0    stevel static void vfs_freecancelopt(char **);
    110   5084   johnlev static void getrootfs(char **, char **);
    111      0    stevel static int getmacpath(dev_info_t *, void *);
    112   4863     praks static void vfs_mnttabvp_setup(void);
    113      0    stevel 
    114      0    stevel struct ipmnt {
    115      0    stevel 	struct ipmnt	*mip_next;
    116      0    stevel 	dev_t		mip_dev;
    117      0    stevel 	struct vfs	*mip_vfsp;
    118      0    stevel };
    119      0    stevel 
    120      0    stevel static kmutex_t		vfs_miplist_mutex;
    121      0    stevel static struct ipmnt	*vfs_miplist = NULL;
    122      0    stevel static struct ipmnt	*vfs_miplist_end = NULL;
    123      0    stevel 
    124   5331       amw static kmem_cache_t *vfs_cache;	/* Pointer to VFS kmem cache */
    125   5331       amw 
    126      0    stevel /*
    127      0    stevel  * VFS global data.
    128      0    stevel  */
    129      0    stevel vnode_t *rootdir;		/* pointer to root inode vnode. */
    130      0    stevel vnode_t *devicesdir;		/* pointer to inode of devices root */
    131   2621     llai1 vnode_t	*devdir;		/* pointer to inode of dev root */
    132      0    stevel 
    133      0    stevel char *server_rootpath;		/* root path for diskless clients */
    134      0    stevel char *server_hostname;		/* hostname of diskless server */
    135      0    stevel 
    136      0    stevel static struct vfs root;
    137      0    stevel static struct vfs devices;
    138   2621     llai1 static struct vfs dev;
    139      0    stevel struct vfs *rootvfs = &root;	/* pointer to root vfs; head of VFS list. */
    140      0    stevel rvfs_t *rvfs_list;		/* array of vfs ptrs for vfs hash list */
    141      0    stevel int vfshsz = 512;		/* # of heads/locks in vfs hash arrays */
    142      0    stevel 				/* must be power of 2!	*/
    143      0    stevel timespec_t vfs_mnttab_ctime;	/* mnttab created time */
    144      0    stevel timespec_t vfs_mnttab_mtime;	/* mnttab last modified time */
    145   4813  dm120769 char *vfs_dummyfstype = "\0";
    146      0    stevel struct pollhead vfs_pollhd;	/* for mnttab pollers */
    147   4863     praks struct vnode *vfs_mntdummyvp;	/* to fake mnttab read/write for file events */
    148   4863     praks int	mntfstype;		/* will be set once mnt fs is mounted */
    149      0    stevel 
    150      0    stevel /*
    151      0    stevel  * Table for generic options recognized in the VFS layer and acted
    152      0    stevel  * on at this level before parsing file system specific options.
    153      0    stevel  * The nosuid option is stronger than any of the devices and setuid
    154      0    stevel  * options, so those are canceled when nosuid is seen.
    155      0    stevel  *
    156      0    stevel  * All options which are added here need to be added to the
    157      0    stevel  * list of standard options in usr/src/cmd/fs.d/fslib.c as well.
    158      0    stevel  */
    159      0    stevel /*
    160      0    stevel  * VFS Mount options table
    161      0    stevel  */
    162      0    stevel static char *ro_cancel[] = { MNTOPT_RW, NULL };
    163      0    stevel static char *rw_cancel[] = { MNTOPT_RO, NULL };
    164      0    stevel static char *suid_cancel[] = { MNTOPT_NOSUID, NULL };
    165      0    stevel static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES,
    166      0    stevel     MNTOPT_NOSETUID, MNTOPT_SETUID, NULL };
    167      0    stevel static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL };
    168      0    stevel static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL };
    169      0    stevel static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL };
    170      0    stevel static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL };
    171      0    stevel static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL };
    172      0    stevel static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL };
    173      0    stevel static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL };
    174      0    stevel static char *noexec_cancel[] = { MNTOPT_EXEC, NULL };
    175      0    stevel 
    176      0    stevel static const mntopt_t mntopts[] = {
    177      0    stevel /*
    178      0    stevel  *	option name		cancel options		default arg	flags
    179      0    stevel  */
    180      0    stevel 	{ MNTOPT_REMOUNT,	NULL,			NULL,
    181      0    stevel 		MO_NODISPLAY, (void *)0 },
    182      0    stevel 	{ MNTOPT_RO,		ro_cancel,		NULL,		0,
    183      0    stevel 		(void *)0 },
    184      0    stevel 	{ MNTOPT_RW,		rw_cancel,		NULL,		0,
    185      0    stevel 		(void *)0 },
    186      0    stevel 	{ MNTOPT_SUID,		suid_cancel,		NULL,		0,
    187      0    stevel 		(void *)0 },
    188      0    stevel 	{ MNTOPT_NOSUID,	nosuid_cancel,		NULL,		0,
    189      0    stevel 		(void *)0 },
    190      0    stevel 	{ MNTOPT_DEVICES,	devices_cancel,		NULL,		0,
    191      0    stevel 		(void *)0 },
    192      0    stevel 	{ MNTOPT_NODEVICES,	nodevices_cancel,	NULL,		0,
    193      0    stevel 		(void *)0 },
    194      0    stevel 	{ MNTOPT_SETUID,	setuid_cancel,		NULL,		0,
    195      0    stevel 		(void *)0 },
    196      0    stevel 	{ MNTOPT_NOSETUID,	nosetuid_cancel,	NULL,		0,
    197      0    stevel 		(void *)0 },
    198      0    stevel 	{ MNTOPT_NBMAND,	nbmand_cancel,		NULL,		0,
    199      0    stevel 		(void *)0 },
    200      0    stevel 	{ MNTOPT_NONBMAND,	nonbmand_cancel,	NULL,		0,
    201      0    stevel 		(void *)0 },
    202      0    stevel 	{ MNTOPT_EXEC,		exec_cancel,		NULL,		0,
    203      0    stevel 		(void *)0 },
    204      0    stevel 	{ MNTOPT_NOEXEC,	noexec_cancel,		NULL,		0,
    205      0    stevel 		(void *)0 },
    206      0    stevel };
    207      0    stevel 
    208      0    stevel const mntopts_t vfs_mntopts = {
    209      0    stevel 	sizeof (mntopts) / sizeof (mntopt_t),
    210      0    stevel 	(mntopt_t *)&mntopts[0]
    211      0    stevel };
    212      0    stevel 
    213      0    stevel /*
    214      0    stevel  * File system operation dispatch functions.
    215      0    stevel  */
    216      0    stevel 
    217      0    stevel int
    218      0    stevel fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
    219      0    stevel {
    220      0    stevel 	return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
    221      0    stevel }
    222      0    stevel 
    223      0    stevel int
    224      0    stevel fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
    225      0    stevel {
    226      0    stevel 	return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
    227      0    stevel }
    228      0    stevel 
    229      0    stevel int
    230      0    stevel fsop_root(vfs_t *vfsp, vnode_t **vpp)
    231      0    stevel {
    232      0    stevel 	refstr_t *mntpt;
    233      0    stevel 	int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
    234      0    stevel 	/*
    235      0    stevel 	 * Make sure this root has a path.  With lofs, it is possible to have
    236      0    stevel 	 * a NULL mountpoint.
    237      0    stevel 	 */
    238    254  eschrock 	if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
    239      0    stevel 		mntpt = vfs_getmntpoint(vfsp);
    240      0    stevel 		vn_setpath_str(*vpp, refstr_value(mntpt),
    241      0    stevel 		    strlen(refstr_value(mntpt)));
    242      0    stevel 		refstr_rele(mntpt);
    243      0    stevel 	}
    244      0    stevel 
    245      0    stevel 	return (ret);
    246      0    stevel }
    247      0    stevel 
    248      0    stevel int
    249      0    stevel fsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
    250      0    stevel {
    251      0    stevel 	return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp);
    252      0    stevel }
    253      0    stevel 
    254      0    stevel int
    255      0    stevel fsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
    256      0    stevel {
    257      0    stevel 	return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
    258      0    stevel }
    259      0    stevel 
    260      0    stevel int
    261      0    stevel fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
    262      0    stevel {
    263   5331       amw 	/*
    264   5331       amw 	 * In order to handle system attribute fids in a manner
    265   5331       amw 	 * transparent to the underlying fs, we embed the fid for
    266   5331       amw 	 * the sysattr parent object in the sysattr fid and tack on
    267   5331       amw 	 * some extra bytes that only the sysattr layer knows about.
    268   5331       amw 	 *
    269   5331       amw 	 * This guarantees that sysattr fids are larger than other fids
    270   7757    Janice 	 * for this vfs. If the vfs supports the sysattr view interface
    271   7757    Janice 	 * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size
    272   7757    Janice 	 * collision with XATTR_FIDSZ.
    273   7757    Janice 	 */
    274   7757    Janice 	if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) &&
    275   5331       amw 	    fidp->fid_len == XATTR_FIDSZ)
    276   5331       amw 		return (xattr_dir_vget(vfsp, vpp, fidp));
    277   5331       amw 
    278      0    stevel 	return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
    279      0    stevel }
    280      0    stevel 
    281      0    stevel int
    282      0    stevel fsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
    283      0    stevel {
    284      0    stevel 	return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
    285      0    stevel }
    286      0    stevel 
    287      0    stevel void
    288      0    stevel fsop_freefs(vfs_t *vfsp)
    289      0    stevel {
    290      0    stevel 	(*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
    291      0    stevel }
    292      0    stevel 
    293      0    stevel int
    294      0    stevel fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
    295      0    stevel {
    296      0    stevel 	return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
    297      0    stevel }
    298      0    stevel 
    299      0    stevel int
    300      0    stevel fsop_sync_by_kind(int fstype, short flag, cred_t *cr)
    301      0    stevel {
    302      0    stevel 	ASSERT((fstype >= 0) && (fstype < nfstype));
    303      0    stevel 
    304      0    stevel 	if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype]))
    305      0    stevel 		return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr);
    306      0    stevel 	else
    307      0    stevel 		return (ENOTSUP);
    308      0    stevel }
    309      0    stevel 
    310      0    stevel /*
    311      0    stevel  * File system initialization.  vfs_setfsops() must be called from a file
    312      0    stevel  * system's init routine.
    313      0    stevel  */
    314      0    stevel 
    315      0    stevel static int
    316      0    stevel fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
    317      0    stevel     int *unused_ops)
    318      0    stevel {
    319      0    stevel 	static const fs_operation_trans_def_t vfs_ops_table[] = {
    320      0    stevel 		VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount),
    321      0    stevel 			fs_nosys, fs_nosys,
    322      0    stevel 
    323      0    stevel 		VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount),
    324      0    stevel 			fs_nosys, fs_nosys,
    325      0    stevel 
    326      0    stevel 		VFSNAME_ROOT, offsetof(vfsops_t, vfs_root),
    327      0    stevel 			fs_nosys, fs_nosys,
    328      0    stevel 
    329      0    stevel 		VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs),
    330      0    stevel 			fs_nosys, fs_nosys,
    331      0    stevel 
    332      0    stevel 		VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync),
    333      0    stevel 			(fs_generic_func_p) fs_sync,
    334      0    stevel 			(fs_generic_func_p) fs_sync,	/* No errors allowed */
    335      0    stevel 
    336      0    stevel 		VFSNAME_VGET, offsetof(vfsops_t, vfs_vget),
    337      0    stevel 			fs_nosys, fs_nosys,
    338      0    stevel 
    339      0    stevel 		VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot),
    340      0    stevel 			fs_nosys, fs_nosys,
    341      0    stevel 
    342      0    stevel 		VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs),
    343      0    stevel 			(fs_generic_func_p)fs_freevfs,
    344      0    stevel 			(fs_generic_func_p)fs_freevfs,	/* Shouldn't fail */
    345      0    stevel 
    346      0    stevel 		VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate),
    347      0    stevel 			(fs_generic_func_p)fs_nosys,
    348      0    stevel 			(fs_generic_func_p)fs_nosys,
    349      0    stevel 
    350      0    stevel 		NULL, 0, NULL, NULL
    351      0    stevel 	};
    352      0    stevel 
    353      0    stevel 	return (fs_build_vector(actual, unused_ops, vfs_ops_table, template));
    354      0    stevel }
    355      0    stevel 
    356   6423   gw25295 void
    357   6423   gw25295 zfs_boot_init() {
    358   6423   gw25295 
    359   6423   gw25295 	if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0)
    360   6423   gw25295 		spa_boot_init();
    361   6423   gw25295 }
    362   6423   gw25295 
    363      0    stevel int
    364      0    stevel vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual)
    365      0    stevel {
    366      0    stevel 	int error;
    367      0    stevel 	int unused_ops;
    368      0    stevel 
    369   3904       rsb 	/*
    370   3904       rsb 	 * Verify that fstype refers to a valid fs.  Note that
    371   3904       rsb 	 * 0 is valid since it's used to set "stray" ops.
    372   3904       rsb 	 */
    373   3904       rsb 	if ((fstype < 0) || (fstype >= nfstype))
    374      0    stevel 		return (EINVAL);
    375      0    stevel 
    376      0    stevel 	if (!ALLOCATED_VFSSW(&vfssw[fstype]))
    377      0    stevel 		return (EINVAL);
    378      0    stevel 
    379      0    stevel 	/* Set up the operations vector. */
    380      0    stevel 
    381      0    stevel 	error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops);
    382      0    stevel 
    383      0    stevel 	if (error != 0)
    384      0    stevel 		return (error);
    385      0    stevel 
    386      0    stevel 	vfssw[fstype].vsw_flag |= VSW_INSTALLED;
    387      0    stevel 
    388      0    stevel 	if (actual != NULL)
    389      0    stevel 		*actual = &vfssw[fstype].vsw_vfsops;
    390      0    stevel 
    391      0    stevel #if DEBUG
    392      0    stevel 	if (unused_ops != 0)
    393      0    stevel 		cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied "
    394      0    stevel 		    "but not used", vfssw[fstype].vsw_name, unused_ops);
    395      0    stevel #endif
    396      0    stevel 
    397      0    stevel 	return (0);
    398      0    stevel }
    399      0    stevel 
    400      0    stevel int
    401      0    stevel vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual)
    402      0    stevel {
    403      0    stevel 	int error;
    404      0    stevel 	int unused_ops;
    405      0    stevel 
    406      0    stevel 	*actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP);
    407      0    stevel 
    408      0    stevel 	error = fs_copyfsops(template, *actual, &unused_ops);
    409      0    stevel 	if (error != 0) {
    410      0    stevel 		kmem_free(*actual, sizeof (vfsops_t));
    411      0    stevel 		*actual = NULL;
    412      0    stevel 		return (error);
    413      0    stevel 	}
    414      0    stevel 
    415      0    stevel 	return (0);
    416      0    stevel }
    417      0    stevel 
    418      0    stevel /*
    419      0    stevel  * Free a vfsops structure created as a result of vfs_makefsops().
    420      0    stevel  * NOTE: For a vfsops structure initialized by vfs_setfsops(), use
    421      0    stevel  * vfs_freevfsops_by_type().
    422      0    stevel  */
    423      0    stevel void
    424      0    stevel vfs_freevfsops(vfsops_t *vfsops)
    425      0    stevel {
    426      0    stevel 	kmem_free(vfsops, sizeof (vfsops_t));
    427      0    stevel }
    428      0    stevel 
    429      0    stevel /*
    430      0    stevel  * Since the vfsops structure is part of the vfssw table and wasn't
    431      0    stevel  * really allocated, we're not really freeing anything.  We keep
    432      0    stevel  * the name for consistency with vfs_freevfsops().  We do, however,
    433      0    stevel  * need to take care of a little bookkeeping.
    434      0    stevel  * NOTE: For a vfsops structure created by vfs_setfsops(), use
    435      0    stevel  * vfs_freevfsops_by_type().
    436      0    stevel  */
    437      0    stevel int
    438      0    stevel vfs_freevfsops_by_type(int fstype)
    439      0    stevel {
    440      0    stevel 
    441      0    stevel 	/* Verify that fstype refers to a loaded fs (and not fsid 0). */
    442      0    stevel 	if ((fstype <= 0) || (fstype >= nfstype))
    443      0    stevel 		return (EINVAL);
    444      0    stevel 
    445      0    stevel 	WLOCK_VFSSW();
    446      0    stevel 	if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) {
    447      0    stevel 		WUNLOCK_VFSSW();
    448      0    stevel 		return (EINVAL);
    449      0    stevel 	}
    450      0    stevel 
    451      0    stevel 	vfssw[fstype].vsw_flag &= ~VSW_INSTALLED;
    452      0    stevel 	WUNLOCK_VFSSW();
    453      0    stevel 
    454      0    stevel 	return (0);
    455      0    stevel }
    456      0    stevel 
    457      0    stevel /* Support routines used to reference vfs_op */
    458      0    stevel 
    459      0    stevel /* Set the operations vector for a vfs */
    460      0    stevel void
    461      0    stevel vfs_setops(vfs_t *vfsp, vfsops_t *vfsops)
    462      0    stevel {
    463      0    stevel 	vfsops_t	*op;
    464      0    stevel 
    465      0    stevel 	ASSERT(vfsp != NULL);
    466      0    stevel 	ASSERT(vfsops != NULL);
    467      0    stevel 
    468      0    stevel 	op = vfsp->vfs_op;
    469      0    stevel 	membar_consumer();
    470   5331       amw 	if (vfsp->vfs_femhead == NULL &&
    471      0    stevel 	    casptr(&vfsp->vfs_op, op, vfsops) == op) {
    472      0    stevel 		return;
    473      0    stevel 	}
    474      0    stevel 	fsem_setvfsops(vfsp, vfsops);
    475      0    stevel }
    476      0    stevel 
    477      0    stevel /* Retrieve the operations vector for a vfs */
    478      0    stevel vfsops_t *
    479      0    stevel vfs_getops(vfs_t *vfsp)
    480      0    stevel {
    481      0    stevel 	vfsops_t	*op;
    482      0    stevel 
    483      0    stevel 	ASSERT(vfsp != NULL);
    484      0    stevel 
    485      0    stevel 	op = vfsp->vfs_op;
    486      0    stevel 	membar_consumer();
    487   5331       amw 	if (vfsp->vfs_femhead == NULL && op == vfsp->vfs_op) {
    488      0    stevel 		return (op);
    489      0    stevel 	} else {
    490      0    stevel 		return (fsem_getvfsops(vfsp));
    491      0    stevel 	}
    492      0    stevel }
    493      0    stevel 
    494      0    stevel /*
    495      0    stevel  * Returns non-zero (1) if the vfsops matches that of the vfs.
    496      0    stevel  * Returns zero (0) if not.
    497      0    stevel  */
    498      0    stevel int
    499      0    stevel vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops)
    500      0    stevel {
    501      0    stevel 	return (vfs_getops(vfsp) == vfsops);
    502      0    stevel }
    503      0    stevel 
    504      0    stevel /*
    505      0    stevel  * Returns non-zero (1) if the file system has installed a non-default,
    506      0    stevel  * non-error vfs_sync routine.  Returns zero (0) otherwise.
    507      0    stevel  */
    508      0    stevel int
    509      0    stevel vfs_can_sync(vfs_t *vfsp)
    510      0    stevel {
    511      0    stevel 	/* vfs_sync() routine is not the default/error function */
    512      0    stevel 	return (vfs_getops(vfsp)->vfs_sync != fs_sync);
    513      0    stevel }
    514      0    stevel 
    515      0    stevel /*
    516      0    stevel  * Initialize a vfs structure.
    517      0    stevel  */
    518      0    stevel void
    519      0    stevel vfs_init(vfs_t *vfsp, vfsops_t *op, void *data)
    520      0    stevel {
    521   5331       amw 	/* Other initialization has been moved to vfs_alloc() */
    522      0    stevel 	vfsp->vfs_count = 0;
    523      0    stevel 	vfsp->vfs_next = vfsp;
    524      0    stevel 	vfsp->vfs_prev = vfsp;
    525      0    stevel 	vfsp->vfs_zone_next = vfsp;
    526      0    stevel 	vfsp->vfs_zone_prev = vfsp;
    527   6734   johnlev 	vfsp->vfs_lofi_minor = 0;
    528   5331       amw 	sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL);
    529   5331       amw 	vfsimpl_setup(vfsp);
    530      0    stevel 	vfsp->vfs_data = (data);
    531      0    stevel 	vfs_setops((vfsp), (op));
    532      0    stevel }
    533      0    stevel 
    534   1925       rsb /*
    535   1925       rsb  * Allocate and initialize the vfs implementation private data
    536   1925       rsb  * structure, vfs_impl_t.
    537   1925       rsb  */
    538   1925       rsb void
    539   1925       rsb vfsimpl_setup(vfs_t *vfsp)
    540   1925       rsb {
    541   5331       amw 	int i;
    542   5331       amw 
    543   5331       amw 	if (vfsp->vfs_implp != NULL) {
    544   5331       amw 		return;
    545   5331       amw 	}
    546   5331       amw 
    547   1925       rsb 	vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP);
    548   5331       amw 	/* Note that these are #define'd in vfs.h */
    549   1925       rsb 	vfsp->vfs_vskap = NULL;
    550   1925       rsb 	vfsp->vfs_fstypevsp = NULL;
    551   5331       amw 
    552   5331       amw 	/* Set size of counted array, then zero the array */
    553   5331       amw 	vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1;
    554   5331       amw 	for (i = 1; i <  VFS_FEATURE_MAXSZ; i++) {
    555   5331       amw 		vfsp->vfs_featureset[i] = 0;
    556   5331       amw 	}
    557   1925       rsb }
    558   1925       rsb 
    559   1925       rsb /*
    560   1925       rsb  * Release the vfs_impl_t structure, if it exists. Some unbundled
    561   1925       rsb  * filesystems may not use the newer version of vfs and thus
    562   1925       rsb  * would not contain this implementation private data structure.
    563   1925       rsb  */
    564   1925       rsb void
    565   1925       rsb vfsimpl_teardown(vfs_t *vfsp)
    566   1925       rsb {
    567   1925       rsb 	vfs_impl_t	*vip = vfsp->vfs_implp;
    568   1925       rsb 
    569   1925       rsb 	if (vip == NULL)
    570   1925       rsb 		return;
    571   1925       rsb 
    572   1925       rsb 	kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t));
    573   1925       rsb 	vfsp->vfs_implp = NULL;
    574   1925       rsb }
    575      0    stevel 
    576      0    stevel /*
    577      0    stevel  * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs,
    578      0    stevel  * fstatvfs, and sysfs moved to common/syscall.
    579      0    stevel  */
    580      0    stevel 
    581      0    stevel /*
    582      0    stevel  * Update every mounted file system.  We call the vfs_sync operation of
    583      0    stevel  * each file system type, passing it a NULL vfsp to indicate that all
    584      0    stevel  * mounted file systems of that type should be updated.
    585      0    stevel  */
    586      0    stevel void
    587      0    stevel vfs_sync(int flag)
    588      0    stevel {
    589      0    stevel 	struct vfssw *vswp;
    590      0    stevel 	RLOCK_VFSSW();
    591      0    stevel 	for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
    592      0    stevel 		if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
    593      0    stevel 			vfs_refvfssw(vswp);
    594      0    stevel 			RUNLOCK_VFSSW();
    595      0    stevel 			(void) (*vswp->vsw_vfsops.vfs_sync)(NULL, flag,
    596      0    stevel 			    CRED());
    597      0    stevel 			vfs_unrefvfssw(vswp);
    598      0    stevel 			RLOCK_VFSSW();
    599      0    stevel 		}
    600      0    stevel 	}
    601      0    stevel 	RUNLOCK_VFSSW();
    602      0    stevel }
    603      0    stevel 
    604      0    stevel void
    605      0    stevel sync(void)
    606      0    stevel {
    607      0    stevel 	vfs_sync(0);
    608      0    stevel }
    609      0    stevel 
    610      0    stevel /*
    611      0    stevel  * External routines.
    612      0    stevel  */
    613      0    stevel 
    614      0    stevel krwlock_t vfssw_lock;	/* lock accesses to vfssw */
    615      0    stevel 
    616      0    stevel /*
    617      0    stevel  * Lock for accessing the vfs linked list.  Initialized in vfs_mountroot(),
    618      0    stevel  * but otherwise should be accessed only via vfs_list_lock() and
    619      0    stevel  * vfs_list_unlock().  Also used to protect the timestamp for mods to the list.
    620      0    stevel  */
    621      0    stevel static krwlock_t vfslist;
    622      0    stevel 
    623      0    stevel /*
    624      0    stevel  * Mount devfs on /devices. This is done right after root is mounted
    625      0    stevel  * to provide device access support for the system
    626      0    stevel  */
    627      0    stevel static void
    628      0    stevel vfs_mountdevices(void)
    629      0    stevel {
    630      0    stevel 	struct vfssw *vsw;
    631      0    stevel 	struct vnode *mvp;
    632      0    stevel 	struct mounta mounta = {	/* fake mounta for devfs_mount() */
    633      0    stevel 		NULL,
    634      0    stevel 		NULL,
    635      0    stevel 		MS_SYSSPACE,
    636      0    stevel 		NULL,
    637      0    stevel 		NULL,
    638      0    stevel 		0,
    639      0    stevel 		NULL,
    640      0    stevel 		0
    641      0    stevel 	};
    642      0    stevel 
    643      0    stevel 	/*
    644      0    stevel 	 * _init devfs module to fill in the vfssw
    645      0    stevel 	 */
    646      0    stevel 	if (modload("fs", "devfs") == -1)
    647   3446       mrj 		panic("Cannot _init devfs module");
    648      0    stevel 
    649      0    stevel 	/*
    650      0    stevel 	 * Hold vfs
    651      0    stevel 	 */
    652      0    stevel 	RLOCK_VFSSW();
    653      0    stevel 	vsw = vfs_getvfsswbyname("devfs");
    654      0    stevel 	VFS_INIT(&devices, &vsw->vsw_vfsops, NULL);
    655      0    stevel 	VFS_HOLD(&devices);
    656      0    stevel 
    657      0    stevel 	/*
    658      0    stevel 	 * Locate mount point
    659      0    stevel 	 */
    660      0    stevel 	if (lookupname("/devices", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
    661   3446       mrj 		panic("Cannot find /devices");
    662      0    stevel 
    663      0    stevel 	/*
    664      0    stevel 	 * Perform the mount of /devices
    665      0    stevel 	 */
    666      0    stevel 	if (VFS_MOUNT(&devices, mvp, &mounta, CRED()))
    667   3446       mrj 		panic("Cannot mount /devices");
    668      0    stevel 
    669      0    stevel 	RUNLOCK_VFSSW();
    670      0    stevel 
    671      0    stevel 	/*
    672      0    stevel 	 * Set appropriate members and add to vfs list for mnttab display
    673      0    stevel 	 */
    674      0    stevel 	vfs_setresource(&devices, "/devices");
    675      0    stevel 	vfs_setmntpoint(&devices, "/devices");
    676      0    stevel 
    677      0    stevel 	/*
    678      0    stevel 	 * Hold the root of /devices so it won't go away
    679      0    stevel 	 */
    680      0    stevel 	if (VFS_ROOT(&devices, &devicesdir))
    681   3446       mrj 		panic("vfs_mountdevices: not devices root");
    682      0    stevel 
    683      0    stevel 	if (vfs_lock(&devices) != 0) {
    684   2621     llai1 		VN_RELE(devicesdir);
    685      0    stevel 		cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /devices");
    686      0    stevel 		return;
    687      0    stevel 	}
    688      0    stevel 
    689      0    stevel 	if (vn_vfswlock(mvp) != 0) {
    690      0    stevel 		vfs_unlock(&devices);
    691   2621     llai1 		VN_RELE(devicesdir);
    692      0    stevel 		cmn_err(CE_NOTE, "Cannot acquire vfswlock of /devices");
    693      0    stevel 		return;
    694      0    stevel 	}
    695      0    stevel 
    696      0    stevel 	vfs_add(mvp, &devices, 0);
    697      0    stevel 	vn_vfsunlock(mvp);
    698      0    stevel 	vfs_unlock(&devices);
    699   2621     llai1 	VN_RELE(devicesdir);
    700   2621     llai1 }
    701   2621     llai1 
    702   2621     llai1 /*
    703   2621     llai1  * mount the first instance of /dev  to root and remain mounted
    704   2621     llai1  */
    705   2621     llai1 static void
    706   2621     llai1 vfs_mountdev1(void)
    707   2621     llai1 {
    708   2621     llai1 	struct vfssw *vsw;
    709   2621     llai1 	struct vnode *mvp;
    710   2621     llai1 	struct mounta mounta = {	/* fake mounta for sdev_mount() */
    711   2621     llai1 		NULL,
    712   2621     llai1 		NULL,
    713   2621     llai1 		MS_SYSSPACE | MS_OVERLAY,
    714   2621     llai1 		NULL,
    715   2621     llai1 		NULL,
    716   2621     llai1 		0,
    717   2621     llai1 		NULL,
    718   2621     llai1 		0
    719   2621     llai1 	};
    720   2621     llai1 
    721   2621     llai1 	/*
    722   2621     llai1 	 * _init dev module to fill in the vfssw
    723   2621     llai1 	 */
    724   2621     llai1 	if (modload("fs", "dev") == -1)
    725   2621     llai1 		cmn_err(CE_PANIC, "Cannot _init dev module\n");
    726   2621     llai1 
    727   2621     llai1 	/*
    728   2621     llai1 	 * Hold vfs
    729   2621     llai1 	 */
    730   2621     llai1 	RLOCK_VFSSW();
    731   2621     llai1 	vsw = vfs_getvfsswbyname("dev");
    732   2621     llai1 	VFS_INIT(&dev, &vsw->vsw_vfsops, NULL);
    733   2621     llai1 	VFS_HOLD(&dev);
    734   2621     llai1 
    735   2621     llai1 	/*
    736   2621     llai1 	 * Locate mount point
    737   2621     llai1 	 */
    738   2621     llai1 	if (lookupname("/dev", UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp))
    739   2621     llai1 		cmn_err(CE_PANIC, "Cannot find /dev\n");
    740   2621     llai1 
    741   2621     llai1 	/*
    742   2621     llai1 	 * Perform the mount of /dev
    743   2621     llai1 	 */
    744   2621     llai1 	if (VFS_MOUNT(&dev, mvp, &mounta, CRED()))
    745   2621     llai1 		cmn_err(CE_PANIC, "Cannot mount /dev 1\n");
    746   2621     llai1 
    747   2621     llai1 	RUNLOCK_VFSSW();
    748   2621     llai1 
    749   2621     llai1 	/*
    750   2621     llai1 	 * Set appropriate members and add to vfs list for mnttab display
    751   2621     llai1 	 */
    752   2621     llai1 	vfs_setresource(&dev, "/dev");
    753   2621     llai1 	vfs_setmntpoint(&dev, "/dev");
    754   2621     llai1 
    755   2621     llai1 	/*
    756   2621     llai1 	 * Hold the root of /dev so it won't go away
    757   2621     llai1 	 */
    758   2621     llai1 	if (VFS_ROOT(&dev, &devdir))
    759   2621     llai1 		cmn_err(CE_PANIC, "vfs_mountdev1: not dev root");
    760   2621     llai1 
    761   2621     llai1 	if (vfs_lock(&dev) != 0) {
    762   2621     llai1 		VN_RELE(devdir);
    763   2621     llai1 		cmn_err(CE_NOTE, "Cannot acquire vfs_lock of /dev");
    764   2621     llai1 		return;
    765   2621     llai1 	}
    766   2621     llai1 
    767   2621     llai1 	if (vn_vfswlock(mvp) != 0) {
    768   2621     llai1 		vfs_unlock(&dev);
    769   2621     llai1 		VN_RELE(devdir);
    770   2621     llai1 		cmn_err(CE_NOTE, "Cannot acquire vfswlock of /dev");
    771   2621     llai1 		return;
    772   2621     llai1 	}
    773   2621     llai1 
    774   2621     llai1 	vfs_add(mvp, &dev, 0);
    775   2621     llai1 	vn_vfsunlock(mvp);
    776   2621     llai1 	vfs_unlock(&dev);
    777   2621     llai1 	VN_RELE(devdir);
    778      0    stevel }
    779      0    stevel 
    780      0    stevel /*
    781      0    stevel  * Mount required filesystem. This is done right after root is mounted.
    782      0    stevel  */
    783      0    stevel static void
    784      0    stevel vfs_mountfs(char *module, char *spec, char *path)
    785      0    stevel {
    786      0    stevel 	struct vnode *mvp;
    787      0    stevel 	struct mounta mounta;
    788      0    stevel 	vfs_t *vfsp;
    789      0    stevel 
    790      0    stevel 	mounta.flags = MS_SYSSPACE | MS_DATA;
    791      0    stevel 	mounta.fstype = module;
    792      0    stevel 	mounta.spec = spec;
    793      0    stevel 	mounta.dir = path;
    794      0    stevel 	if (lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &mvp)) {
    795   3446       mrj 		cmn_err(CE_WARN, "Cannot find %s", path);
    796      0    stevel 		return;
    797      0    stevel 	}
    798      0    stevel 	if (domount(NULL, &mounta, mvp, CRED(), &vfsp))
    799   3446       mrj 		cmn_err(CE_WARN, "Cannot mount %s", path);
    800      0    stevel 	else
    801      0    stevel 		VFS_RELE(vfsp);
    802      0    stevel 	VN_RELE(mvp);
    803      0    stevel }
    804      0    stevel 
    805      0    stevel /*
    806      0    stevel  * vfs_mountroot is called by main() to mount the root filesystem.
    807      0    stevel  */
    808      0    stevel void
    809      0    stevel vfs_mountroot(void)
    810      0    stevel {
    811      0    stevel 	struct vnode	*rvp = NULL;
    812      0    stevel 	char		*path;
    813      0    stevel 	size_t		plen;
    814   1488       rsb 	struct vfssw	*vswp;
    815  11173  Jonathan 	proc_t		*p;
    816      0    stevel 
    817      0    stevel 	rw_init(&vfssw_lock, NULL, RW_DEFAULT, NULL);
    818      0    stevel 	rw_init(&vfslist, NULL, RW_DEFAULT, NULL);
    819      0    stevel 
    820      0    stevel 	/*
    821      0    stevel 	 * Alloc the vfs hash bucket array and locks
    822      0    stevel 	 */
    823      0    stevel 	rvfs_list = kmem_zalloc(vfshsz * sizeof (rvfs_t), KM_SLEEP);
    824      0    stevel 
    825      0    stevel 	/*
    826      0    stevel 	 * Call machine-dependent routine "rootconf" to choose a root
    827      0    stevel 	 * file system type.
    828      0    stevel 	 */
    829      0    stevel 	if (rootconf())
    830   3446       mrj 		panic("vfs_mountroot: cannot mount root");
    831      0    stevel 	/*
    832      0    stevel 	 * Get vnode for '/'.  Set up rootdir, u.u_rdir and u.u_cdir
    833      0    stevel 	 * to point to it.  These are used by lookuppn() so that it
    834      0    stevel 	 * knows where to start from ('/' or '.').
    835      0    stevel 	 */
    836      0    stevel 	vfs_setmntpoint(rootvfs, "/");
    837      0    stevel 	if (VFS_ROOT(rootvfs, &rootdir))
    838   3446       mrj 		panic("vfs_mountroot: no root vnode");
    839  11173  Jonathan 
    840  11173  Jonathan 	/*
    841  11173  Jonathan 	 * At this point, the process tree consists of p0 and possibly some
    842  11173  Jonathan 	 * direct children of p0.  (i.e. there are no grandchildren)
    843  11173  Jonathan 	 *
    844  11173  Jonathan 	 * Walk through them all, setting their current directory.
    845  11173  Jonathan 	 */
    846  11173  Jonathan 	mutex_enter(&pidlock);
    847  11173  Jonathan 	for (p = practive; p != NULL; p = p->p_next) {
    848  11173  Jonathan 		ASSERT(p == &p0 || p->p_parent == &p0);
    849  11173  Jonathan 
    850  11173  Jonathan 		PTOU(p)->u_cdir = rootdir;
    851  11173  Jonathan 		VN_HOLD(PTOU(p)->u_cdir);
    852  11173  Jonathan 		PTOU(p)->u_rdir = NULL;
    853  11173  Jonathan 	}
    854  11173  Jonathan 	mutex_exit(&pidlock);
    855      0    stevel 
    856      0    stevel 	/*
    857      0    stevel 	 * Setup the global zone's rootvp, now that it exists.
    858      0    stevel 	 */
    859      0    stevel 	global_zone->zone_rootvp = rootdir;
    860      0    stevel 	VN_HOLD(global_zone->zone_rootvp);
    861      0    stevel 
    862      0    stevel 	/*
    863      0    stevel 	 * Notify the module code that it can begin using the
    864      0    stevel 	 * root filesystem instead of the boot program's services.
    865      0    stevel 	 */
    866      0    stevel 	modrootloaded = 1;
    867   6423   gw25295 
    868   6423   gw25295 	/*
    869   6423   gw25295 	 * Special handling for a ZFS root file system.
    870   6423   gw25295 	 */
    871   6423   gw25295 	zfs_boot_init();
    872   6423   gw25295 
    873      0    stevel 	/*
    874      0    stevel 	 * Set up mnttab information for root
    875      0    stevel 	 */
    876      0    stevel 	vfs_setresource(rootvfs, rootfs.bo_name);
    877      0    stevel 
    878      0    stevel 	/*
    879      0    stevel 	 * Notify cluster software that the root filesystem is available.
    880      0    stevel 	 */
    881      0    stevel 	clboot_mountroot();
    882   1488       rsb 
    883   1488       rsb 	/* Now that we're all done with the root FS, set up its vopstats */
    884   1488       rsb 	if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) != NULL) {
    885   1488       rsb 		/* Set flag for statistics collection */
    886   1488       rsb 		if (vswp->vsw_flag & VSW_STATS) {
    887   1520       rsb 			initialize_vopstats(&rootvfs->vfs_vopstats);
    888   1488       rsb 			rootvfs->vfs_flag |= VFS_STATS;
    889   1520       rsb 			rootvfs->vfs_fstypevsp =
    890   1520       rsb 			    get_fstype_vopstats(rootvfs, vswp);
    891   1520       rsb 			rootvfs->vfs_vskap = get_vskstat_anchor(rootvfs);
    892   1488       rsb 		}
    893   1488       rsb 		vfs_unrefvfssw(vswp);
    894   1488       rsb 	}
    895      0    stevel 
    896      0    stevel 	/*
    897   2621     llai1 	 * Mount /devices, /dev instance 1, /system/contract, /etc/mnttab,
    898   3957  th199096 	 * /etc/svc/volatile, /etc/dfs/sharetab, /system/object, and /proc.
    899      0    stevel 	 */
    900      0    stevel 	vfs_mountdevices();
    901   2621     llai1 	vfs_mountdev1();
    902      0    stevel 
    903      0    stevel 	vfs_mountfs("ctfs", "ctfs", CTFS_ROOT);
    904      0    stevel 	vfs_mountfs("proc", "/proc", "/proc");
    905      0    stevel 	vfs_mountfs("mntfs", "/etc/mnttab", "/etc/mnttab");
    906      0    stevel 	vfs_mountfs("tmpfs", "/etc/svc/volatile", "/etc/svc/volatile");
    907      0    stevel 	vfs_mountfs("objfs", "objfs", OBJFS_ROOT);
    908   3957  th199096 
    909   3957  th199096 	if (getzoneid() == GLOBAL_ZONEID) {
    910   3957  th199096 		vfs_mountfs("sharefs", "sharefs", "/etc/dfs/sharetab");
    911   3957  th199096 	}
    912      0    stevel 
    913      0    stevel #ifdef __sparc
    914      0    stevel 	/*
    915      0    stevel 	 * This bit of magic can go away when we convert sparc to
    916      0    stevel 	 * the new boot architecture based on ramdisk.
    917      0    stevel 	 *
    918      0    stevel 	 * Booting off a mirrored root volume:
    919      0    stevel 	 * At this point, we have booted and mounted root on a
    920      0    stevel 	 * single component of the mirror.  Complete the boot
    921      0    stevel 	 * by configuring SVM and converting the root to the
    922      0    stevel 	 * dev_t of the mirrored root device.  This dev_t conversion
    923      0    stevel 	 * only works because the underlying device doesn't change.
    924      0    stevel 	 */
    925      0    stevel 	if (root_is_svm) {
    926      0    stevel 		if (svm_rootconf()) {
    927   3446       mrj 			panic("vfs_mountroot: cannot remount root");
    928      0    stevel 		}
    929      0    stevel 
    930      0    stevel 		/*
    931      0    stevel 		 * mnttab should reflect the new root device
    932      0    stevel 		 */
    933      0    stevel 		vfs_lock_wait(rootvfs);
    934      0    stevel 		vfs_setresource(rootvfs, rootfs.bo_name);
    935      0    stevel 		vfs_unlock(rootvfs);
    936      0    stevel 	}
    937      0    stevel #endif /* __sparc */
    938      0    stevel 
    939      0    stevel 	/*
    940      0    stevel 	 * Look up the root device via devfs so that a dv_node is
    941      0    stevel 	 * created for it. The vnode is never VN_RELE()ed.
    942      0    stevel 	 * We allocate more than MAXPATHLEN so that the
    943      0    stevel 	 * buffer passed to i_ddi_prompath_to_devfspath() is
    944      0    stevel 	 * exactly MAXPATHLEN (the function expects a buffer
    945      0    stevel 	 * of that length).
    946      0    stevel 	 */
    947      0    stevel 	plen = strlen("/devices");
    948      0    stevel 	path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
    949      0    stevel 	(void) strcpy(path, "/devices");
    950      0    stevel 
    951      0    stevel 	if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
    952      0    stevel 	    != DDI_SUCCESS ||
    953      0    stevel 	    lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
    954      0    stevel 
    955      0    stevel 		/* NUL terminate in case "path" has garbage */
    956      0    stevel 		path[plen + MAXPATHLEN - 1] = '\0';
    957      0    stevel #ifdef	DEBUG
    958      0    stevel 		cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
    959      0    stevel #endif
    960      0    stevel 	}
    961      0    stevel 	kmem_free(path, plen + MAXPATHLEN);
    962   4863     praks 	vfs_mnttabvp_setup();
    963    994     evanl }
    964    994     evanl 
    965    994     evanl /*
    966    994     evanl  * If remount failed and we're in a zone we need to check for the zone
    967    994     evanl  * root path and strip it before the call to vfs_setpath().
    968    994     evanl  *
    969    994     evanl  * If strpath doesn't begin with the zone_rootpath the original
    970    994     evanl  * strpath is returned unchanged.
    971    994     evanl  */
    972    994     evanl static const char *
    973    994     evanl stripzonepath(const char *strpath)
    974    994     evanl {
    975    994     evanl 	char *str1, *str2;
    976    994     evanl 	int i;
    977    994     evanl 	zone_t *zonep = curproc->p_zone;
    978    994     evanl 
    979    994     evanl 	if (zonep->zone_rootpath == NULL || strpath == NULL) {
    980    994     evanl 		return (NULL);
    981    994     evanl 	}
    982    994     evanl 
    983    994     evanl 	/*
    984    994     evanl 	 * we check for the end of the string at one past the
    985    994     evanl 	 * current position because the zone_rootpath always
    986    994     evanl 	 * ends with "/" but we don't want to strip that off.
    987    994     evanl 	 */
    988    994     evanl 	str1 = zonep->zone_rootpath;
    989    994     evanl 	str2 = (char *)strpath;
    990    994     evanl 	ASSERT(str1[0] != '\0');
    991    994     evanl 	for (i = 0; str1[i + 1] != '\0'; i++) {
    992    994     evanl 		if (str1[i] != str2[i])
    993    994     evanl 			return ((char *)strpath);
    994    994     evanl 	}
    995    994     evanl 	return (&str2[i]);
    996   6734   johnlev }
    997   6734   johnlev 
    998   6734   johnlev /*
    999   6734   johnlev  * Check to see if our "block device" is actually a file.  If so,
   1000   6734   johnlev  * automatically add a lofi device, and keep track of this fact.
   1001   6734   johnlev  */
   1002   6734   johnlev static int
   1003   6734   johnlev lofi_add(const char *fsname, struct vfs *vfsp,
   1004   6734   johnlev     mntopts_t *mntopts, struct mounta *uap)
   1005   6734   johnlev {
   1006   6734   johnlev 	int fromspace = (uap->flags & MS_SYSSPACE) ?
   1007   6734   johnlev 	    UIO_SYSSPACE : UIO_USERSPACE;
   1008   6734   johnlev 	struct lofi_ioctl *li = NULL;
   1009   6734   johnlev 	struct vnode *vp = NULL;
   1010   6734   johnlev 	struct pathname	pn = { NULL };
   1011   6734   johnlev 	ldi_ident_t ldi_id;
   1012   6734   johnlev 	ldi_handle_t ldi_hdl;
   1013   6855   johnlev 	vfssw_t *vfssw;
   1014   6734   johnlev 	int minor;
   1015   6734   johnlev 	int err = 0;
   1016   6734   johnlev 
   1017   6855   johnlev 	if (fsname == NULL ||
   1018   6855   johnlev 	    (vfssw = vfs_getvfssw(fsname)) == NULL)
   1019   6855   johnlev 		return (0);
   1020   6855   johnlev 
   1021   6855   johnlev 	if (!(vfssw->vsw_flag & VSW_CANLOFI)) {
   1022   6855   johnlev 		vfs_unrefvfssw(vfssw);
   1023   6855   johnlev 		return (0);
   1024   6855   johnlev 	}
   1025   6855   johnlev 
   1026   6855   johnlev 	vfs_unrefvfssw(vfssw);
   1027   6855   johnlev 	vfssw = NULL;
   1028   6734   johnlev 
   1029   6734   johnlev 	if (pn_get(uap->spec, fromspace, &pn) != 0)
   1030   6734   johnlev 		return (0);
   1031   6734   johnlev 
   1032   6734   johnlev 	if (lookupname(uap->spec, fromspace, FOLLOW, NULL, &vp) != 0)
   1033   6734   johnlev 		goto out;
   1034   6734   johnlev 
   1035   6734   johnlev 	if (vp->v_type != VREG)
   1036   6734   johnlev 		goto out;
   1037   6734   johnlev 
   1038   6734   johnlev 	/* OK, this is a lofi mount. */
   1039   6734   johnlev 
   1040   6734   johnlev 	if ((uap->flags & (MS_REMOUNT|MS_GLOBAL)) ||
   1041   6734   johnlev 	    vfs_optionisset_nolock(mntopts, MNTOPT_SUID, NULL) ||
   1042   6734   johnlev 	    vfs_optionisset_nolock(mntopts, MNTOPT_SETUID, NULL) ||
   1043   6734   johnlev 	    vfs_optionisset_nolock(mntopts, MNTOPT_DEVICES, NULL)) {
   1044   6734   johnlev 		err = EINVAL;
   1045   6734   johnlev 		goto out;
   1046   6734   johnlev 	}
   1047   6734   johnlev 
   1048   6734   johnlev 	ldi_id = ldi_ident_from_anon();
   1049   6734   johnlev 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
   1050   8081      Dina 	(void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN);
   1051   6734   johnlev 
   1052   6734   johnlev 	/*
   1053   6734   johnlev 	 * The lofi control node is currently exclusive-open.  We'd like
   1054   6734   johnlev 	 * to improve this, but in the meantime, we'll loop waiting for
   1055   6734   johnlev 	 * access.
   1056   6734   johnlev 	 */
   1057   6734   johnlev 	for (;;) {
   1058   6734   johnlev 		err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
   1059   6734   johnlev 		    kcred, &ldi_hdl, ldi_id);
   1060   6734   johnlev 
   1061   6734   johnlev 		if (err != EBUSY)
   1062   6734   johnlev 			break;
   1063   6734   johnlev 
   1064   6734   johnlev 		if ((err = delay_sig(hz / 8)) == EINTR)
   1065   6734   johnlev 			break;
   1066   6734   johnlev 	}
   1067   6734   johnlev 
   1068   6734   johnlev 	if (err)
   1069   6734   johnlev 		goto out2;
   1070   6734   johnlev 
   1071   6734   johnlev 	err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
   1072   6734   johnlev 	    FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor);
   1073   6734   johnlev 
   1074   6734   johnlev 	(void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
   1075   6734   johnlev 
   1076   6734   johnlev 	if (!err)
   1077   6734   johnlev 		vfsp->vfs_lofi_minor = minor;
   1078   6734   johnlev 
   1079   6734   johnlev out2:
   1080   6734   johnlev 	ldi_ident_release(ldi_id);
   1081   6734   johnlev out:
   1082   6734   johnlev 	if (li != NULL)
   1083   6734   johnlev 		kmem_free(li, sizeof (*li));
   1084   6734   johnlev 	if (vp != NULL)
   1085   6734   johnlev 		VN_RELE(vp);
   1086   6734   johnlev 	pn_free(&pn);
   1087   6734   johnlev 	return (err);
   1088   6734   johnlev }
   1089   6734   johnlev 
   1090   6734   johnlev static void
   1091   6734   johnlev lofi_remove(struct vfs *vfsp)
   1092   6734   johnlev {
   1093   6734   johnlev 	struct lofi_ioctl *li = NULL;
   1094   6734   johnlev 	ldi_ident_t ldi_id;
   1095   6734   johnlev 	ldi_handle_t ldi_hdl;
   1096   6734   johnlev 	int err;
   1097   6734   johnlev 
   1098   6734   johnlev 	if (vfsp->vfs_lofi_minor == 0)
   1099   6734   johnlev 		return;
   1100   6734   johnlev 
   1101   6734   johnlev 	ldi_id = ldi_ident_from_anon();
   1102   6734   johnlev 
   1103   6734   johnlev 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
   1104   6734   johnlev 	li->li_minor = vfsp->vfs_lofi_minor;
   1105   6734   johnlev 	li->li_cleanup = B_TRUE;
   1106   6734   johnlev 
   1107   6734   johnlev 	do {
   1108   6734   johnlev 		err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL,
   1109   6734   johnlev 		    kcred, &ldi_hdl, ldi_id);
   1110   6734   johnlev 	} while (err == EBUSY);
   1111   6734   johnlev 
   1112   6734   johnlev 	if (err)
   1113   6734   johnlev 		goto out;
   1114   6734   johnlev 
   1115   6734   johnlev 	err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li,
   1116   6734   johnlev 	    FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL);
   1117   6734   johnlev 
   1118   6734   johnlev 	(void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred);
   1119   6734   johnlev 
   1120   6734   johnlev 	if (!err)
   1121   6734   johnlev 		vfsp->vfs_lofi_minor = 0;
   1122   6734   johnlev 
   1123   6734   johnlev out:
   1124   6734   johnlev 	ldi_ident_release(ldi_id);
   1125   6734   johnlev 	if (li != NULL)
   1126   6734   johnlev 		kmem_free(li, sizeof (*li));
   1127      0    stevel }
   1128      0    stevel 
   1129      0    stevel /*
   1130      0    stevel  * Common mount code.  Called from the system call entry point, from autofs,
   1131   5302  th199096  * nfsv4 trigger mounts, and from pxfs.
   1132      0    stevel  *
   1133      0    stevel  * Takes the effective file system type, mount arguments, the mount point
   1134      0    stevel  * vnode, flags specifying whether the mount is a remount and whether it
   1135      0    stevel  * should be entered into the vfs list, and credentials.  Fills in its vfspp
   1136      0    stevel  * parameter with the mounted file system instance's vfs.
   1137      0    stevel  *
   1138      0    stevel  * Note that the effective file system type is specified as a string.  It may
   1139      0    stevel  * be null, in which case it's determined from the mount arguments, and may
   1140      0    stevel  * differ from the type specified in the mount arguments; this is a hook to
   1141      0    stevel  * allow interposition when instantiating file system instances.
   1142      0    stevel  *
   1143      0    stevel  * The caller is responsible for releasing its own hold on the mount point
   1144      0    stevel  * vp (this routine does its own hold when necessary).
   1145      0    stevel  * Also note that for remounts, the mount point vp should be the vnode for
   1146      0    stevel  * the root of the file system rather than the vnode that the file system
   1147      0    stevel  * is mounted on top of.
   1148      0    stevel  */
   1149      0    stevel int
   1150      0    stevel domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
   1151      0    stevel 	struct vfs **vfspp)
   1152      0    stevel {
   1153      0    stevel 	struct vfssw	*vswp;
   1154      0    stevel 	vfsops_t	*vfsops;
   1155      0    stevel 	struct vfs	*vfsp;
   1156      0    stevel 	struct vnode	*bvp;
   1157      0    stevel 	dev_t		bdev = 0;
   1158      0    stevel 	mntopts_t	mnt_mntopts;
   1159      0    stevel 	int		error = 0;
   1160      0    stevel 	int		copyout_error = 0;
   1161      0    stevel 	int		ovflags;
   1162      0    stevel 	char		*opts = uap->optptr;
   1163      0    stevel 	char		*inargs = opts;
   1164      0    stevel 	int		optlen = uap->optlen;
   1165      0    stevel 	int		remount;
   1166      0    stevel 	int		rdonly;
   1167      0    stevel 	int		nbmand = 0;
   1168      0    stevel 	int		delmip = 0;
   1169      0    stevel 	int		addmip = 0;
   1170      0    stevel 	int		splice = ((uap->flags & MS_NOSPLICE) == 0);
   1171      0    stevel 	int		fromspace = (uap->flags & MS_SYSSPACE) ?
   1172   3912     lling 	    UIO_SYSSPACE : UIO_USERSPACE;
   1173      0    stevel 	char		*resource = NULL, *mountpt = NULL;
   1174      0    stevel 	refstr_t	*oldresource, *oldmntpt;
   1175      0    stevel 	struct pathname	pn, rpn;
   1176   1520       rsb 	vsk_anchor_t	*vskap;
   1177   6734   johnlev 	char fstname[FSTYPSZ];
   1178      0    stevel 
   1179      0    stevel 	/*
   1180      0    stevel 	 * The v_flag value for the mount point vp is permanently set
   1181      0    stevel 	 * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine
   1182      0    stevel 	 * for mount point locking.
   1183      0    stevel 	 */
   1184      0    stevel 	mutex_enter(&vp->v_lock);
   1185      0    stevel 	vp->v_flag |= VVFSLOCK;
   1186      0    stevel 	mutex_exit(&vp->v_lock);
   1187      0    stevel 
   1188      0    stevel 	mnt_mntopts.mo_count = 0;
   1189      0    stevel 	/*
   1190      0    stevel 	 * Find the ops vector to use to invoke the file system-specific mount
   1191      0    stevel 	 * method.  If the fsname argument is non-NULL, use it directly.
   1192      0    stevel 	 * Otherwise, dig the file system type information out of the mount
   1193      0    stevel 	 * arguments.
   1194      0    stevel 	 *
   1195      0    stevel 	 * A side effect is to hold the vfssw entry.
   1196      0    stevel 	 *
   1197      0    stevel 	 * Mount arguments can be specified in several ways, which are
   1198      0    stevel 	 * distinguished by flag bit settings.  The preferred way is to set
   1199      0    stevel 	 * MS_OPTIONSTR, indicating an 8 argument mount with the file system
   1200      0    stevel 	 * type supplied as a character string and the last two arguments
   1201      0    stevel 	 * being a pointer to a character buffer and the size of the buffer.
   1202      0    stevel 	 * On entry, the buffer holds a null terminated list of options; on
   1203      0    stevel 	 * return, the string is the list of options the file system
   1204      0    stevel 	 * recognized. If MS_DATA is set arguments five and six point to a
   1205      0    stevel 	 * block of binary data which the file system interprets.
   1206      0    stevel 	 * A further wrinkle is that some callers don't set MS_FSS and MS_DATA
   1207      0    stevel 	 * consistently with these conventions.  To handle them, we check to
   1208      0    stevel 	 * see whether the pointer to the file system name has a numeric value
   1209      0    stevel 	 * less than 256.  If so, we treat it as an index.
   1210      0    stevel 	 */
   1211      0    stevel 	if (fsname != NULL) {
   1212      0    stevel 		if ((vswp = vfs_getvfssw(fsname)) == NULL) {
   1213      0    stevel 			return (EINVAL);
   1214      0    stevel 		}
   1215      0    stevel 	} else if (uap->flags & (MS_OPTIONSTR | MS_DATA | MS_FSS)) {
   1216      0    stevel 		size_t n;
   1217      0    stevel 		uint_t fstype;
   1218   6734   johnlev 
   1219   6734   johnlev 		fsname = fstname;
   1220      0    stevel 
   1221      0    stevel 		if ((fstype = (uintptr_t)uap->fstype) < 256) {
   1222      0    stevel 			RLOCK_VFSSW();
   1223      0    stevel 			if (fstype == 0 || fstype >= nfstype ||
   1224      0    stevel 			    !ALLOCATED_VFSSW(&vfssw[fstype])) {
   1225      0    stevel 				RUNLOCK_VFSSW();
   1226      0    stevel 				return (EINVAL);
   1227      0    stevel 			}
   1228   6734   johnlev 			(void) strcpy(fsname, vfssw[fstype].vsw_name);
   1229   6734   johnlev 			RUNLOCK_VFSSW();
   1230   6734   johnlev 			if ((vswp = vfs_getvfssw(fsname)) == NULL)
   1231      0    stevel 				return (EINVAL);
   1232      0    stevel 		} else {
   1233      0    stevel 			/*
   1234      0    stevel 			 * Handle either kernel or user address space.
   1235      0    stevel 			 */
   1236      0    stevel 			if (uap->flags & MS_SYSSPACE) {
   1237   6734   johnlev 				error = copystr(uap->fstype, fsname,
   1238      0    stevel 				    FSTYPSZ, &n);
   1239      0    stevel 			} else {
   1240   6734   johnlev 				error = copyinstr(uap->fstype, fsname,
   1241      0    stevel 				    FSTYPSZ, &n);
   1242      0    stevel 			}
   1243      0    stevel 			if (error) {
   1244      0    stevel 				if (error == ENAMETOOLONG)
   1245      0    stevel 					return (EINVAL);
   1246      0    stevel 				return (error);
   1247      0    stevel 			}
   1248   6734   johnlev 			if ((vswp = vfs_getvfssw(fsname)) == NULL)
   1249      0    stevel 				return (EINVAL);
   1250      0    stevel 		}
   1251      0    stevel 	} else {
   1252      0    stevel 		if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL)
   1253      0    stevel 			return (EINVAL);
   1254      0    stevel 	}
   1255      0    stevel 	if (!VFS_INSTALLED(vswp))
   1256      0    stevel 		return (EINVAL);
   1257      0    stevel 	vfsops = &vswp->vsw_vfsops;
   1258      0    stevel 
   1259      0    stevel 	vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts);
   1260      0    stevel 	/*
   1261      0    stevel 	 * Fetch mount options and parse them for generic vfs options
   1262      0    stevel 	 */
   1263      0    stevel 	if (uap->flags & MS_OPTIONSTR) {
   1264      0    stevel 		/*
   1265      0    stevel 		 * Limit the buffer size
   1266      0    stevel 		 */
   1267      0    stevel 		if (optlen < 0 || optlen > MAX_MNTOPT_STR) {
   1268      0    stevel 			error = EINVAL;
   1269      0    stevel 			goto errout;
   1270      0    stevel 		}
   1271      0    stevel 		if ((uap->flags & MS_SYSSPACE) == 0) {
   1272      0    stevel 			inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
   1273      0    stevel 			inargs[0] = '\0';
   1274      0    stevel 			if (optlen) {
   1275      0    stevel 				error = copyinstr(opts, inargs, (size_t)optlen,
   1276   3912     lling 				    NULL);
   1277      0    stevel 				if (error) {
   1278      0    stevel 					goto errout;
   1279      0    stevel 				}
   1280      0    stevel 			}
   1281      0    stevel 		}
   1282      0    stevel 		vfs_parsemntopts(&mnt_mntopts, inargs, 0);
   1283      0    stevel 	}
   1284      0    stevel 	/*
   1285      0    stevel 	 * Flag bits override the options string.
   1286      0    stevel 	 */
   1287      0    stevel 	if (uap->flags & MS_REMOUNT)
   1288      0    stevel 		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0);
   1289      0    stevel 	if (uap->flags & MS_RDONLY)
   1290      0    stevel 		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0);
   1291      0    stevel 	if (uap->flags & MS_NOSUID)
   1292      0    stevel 		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
   1293      0    stevel 
   1294      0    stevel 	/*
   1295      0    stevel 	 * Check if this is a remount; must be set in the option string and
   1296      0    stevel 	 * the file system must support a remount option.
   1297      0    stevel 	 */
   1298      0    stevel 	if (remount = vfs_optionisset_nolock(&mnt_mntopts,
   1299      0    stevel 	    MNTOPT_REMOUNT, NULL)) {
   1300      0    stevel 		if (!(vswp->vsw_flag & VSW_CANREMOUNT)) {
   1301      0    stevel 			error = ENOTSUP;
   1302      0    stevel 			goto errout;
   1303      0    stevel 		}
   1304      0    stevel 		uap->flags |= MS_REMOUNT;
   1305      0    stevel 	}
   1306      0    stevel 
   1307      0    stevel 	/*
   1308      0    stevel 	 * uap->flags and vfs_optionisset() should agree.
   1309      0    stevel 	 */
   1310      0    stevel 	if (rdonly = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) {
   1311      0    stevel 		uap->flags |= MS_RDONLY;
   1312      0    stevel 	}
   1313      0    stevel 	if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) {
   1314      0    stevel 		uap->flags |= MS_NOSUID;
   1315      0    stevel 	}
   1316      0    stevel 	nbmand = vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NBMAND, NULL);
   1317      0    stevel 	ASSERT(splice || !remount);
   1318      0    stevel 	/*
   1319      0    stevel 	 * If we are splicing the fs into the namespace,
   1320      0    stevel 	 * perform mount point checks.
   1321      0    stevel 	 *
   1322      0    stevel 	 * We want to resolve the path for the mount point to eliminate
   1323      0    stevel 	 * '.' and ".." and symlinks in mount points; we can't do the
   1324      0    stevel 	 * same for the resource string, since it would turn
   1325      0    stevel 	 * "/dev/dsk/c0t0d0s0" into "/devices/pci@...".  We need to do
   1326      0    stevel 	 * this before grabbing vn_vfswlock(), because otherwise we
   1327      0    stevel 	 * would deadlock with lookuppn().
   1328      0    stevel 	 */
   1329      0    stevel 	if (splice) {
   1330      0    stevel 		ASSERT(vp->v_count > 0);
   1331      0    stevel 
   1332      0    stevel 		/*
   1333      0    stevel 		 * Pick up mount point and device from appropriate space.
   1334      0    stevel 		 */
   1335      0    stevel 		if (pn_get(uap->spec, fromspace, &pn) == 0) {
   1336      0    stevel 			resource = kmem_alloc(pn.pn_pathlen + 1,
   1337      0    stevel 			    KM_SLEEP);
   1338      0    stevel 			(void) strcpy(resource, pn.pn_path);
   1339      0    stevel 			pn_free(&pn);
   1340      0    stevel 		}
   1341      0    stevel 		/*
   1342      0    stevel 		 * Do a lookupname prior to taking the
   1343      0    stevel 		 * writelock. Mark this as completed if
   1344      0    stevel 		 * successful for later cleanup and addition to
   1345      0    stevel 		 * the mount in progress table.
   1346      0    stevel 		 */
   1347      0    stevel 		if ((uap->flags & MS_GLOBAL) == 0 &&
   1348      0    stevel 		    lookupname(uap->spec, fromspace,
   1349   3912     lling 		    FOLLOW, NULL, &bvp) == 0) {
   1350      0    stevel 			addmip = 1;
   1351      0    stevel 		}
   1352      0    stevel 
   1353      0    stevel 		if ((error = pn_get(uap->dir, fromspace, &pn)) == 0) {
   1354      0    stevel 			pathname_t *pnp;
   1355      0    stevel 
   1356      0    stevel 			if (*pn.pn_path != '/') {
   1357      0    stevel 				error = EINVAL;
   1358      0    stevel 				pn_free(&pn);
   1359      0    stevel 				goto errout;
   1360      0    stevel 			}
   1361      0    stevel 			pn_alloc(&rpn);
   1362      0    stevel 			/*
   1363      0    stevel 			 * Kludge to prevent autofs from deadlocking with
   1364      0    stevel 			 * itself when it calls domount().
   1365      0    stevel 			 *
   1366      0    stevel 			 * If autofs is calling, it is because it is doing
   1367      0    stevel 			 * (autofs) mounts in the process of an NFS mount.  A
   1368      0    stevel 			 * lookuppn() here would cause us to block waiting for
   1369      0    stevel 			 * said NFS mount to complete, which can't since this
   1370      0    stevel 			 * is the thread that was supposed to doing it.
   1371      0    stevel 			 */
   1372      0    stevel 			if (fromspace == UIO_USERSPACE) {
   1373      0    stevel 				if ((error = lookuppn(&pn, &rpn, FOLLOW, NULL,
   1374      0    stevel 				    NULL)) == 0) {
   1375      0    stevel 					pnp = &rpn;
   1376      0    stevel 				} else {
   1377      0    stevel 					/*
   1378      0    stevel 					 * The file disappeared or otherwise
   1379      0    stevel 					 * became inaccessible since we opened
   1380      0    stevel 					 * it; might as well fail the mount
   1381      0    stevel 					 * since the mount point is no longer
   1382      0    stevel 					 * accessible.
   1383      0    stevel 					 */
   1384      0    stevel 					pn_free(&rpn);
   1385      0    stevel 					pn_free(&pn);
   1386      0    stevel 					goto errout;
   1387      0    stevel 				}
   1388      0    stevel 			} else {
   1389      0    stevel 				pnp = &pn;
   1390      0    stevel 			}
   1391      0    stevel 			mountpt = kmem_alloc(pnp->pn_pathlen + 1, KM_SLEEP);
   1392      0    stevel 			(void) strcpy(mountpt, pnp->pn_path);
   1393      0    stevel 
   1394      0    stevel 			/*
   1395      0    stevel 			 * If the addition of the zone's rootpath
   1396      0    stevel 			 * would push us over a total path length
   1397      0    stevel 			 * of MAXPATHLEN, we fail the mount with
   1398      0    stevel 			 * ENAMETOOLONG, which is what we would have
   1399      0    stevel 			 * gotten if we were trying to perform the same
   1400      0    stevel 			 * mount in the global zone.
   1401      0    stevel 			 *
   1402      0    stevel 			 * strlen() doesn't count the trailing
   1403      0    stevel 			 * '\0', but zone_rootpathlen counts both a
   1404      0    stevel 			 * trailing '/' and the terminating '\0'.
   1405      0    stevel 			 */
   1406      0    stevel 			if ((curproc->p_zone->zone_rootpathlen - 1 +
   1407      0    stevel 			    strlen(mountpt)) > MAXPATHLEN ||
   1408      0    stevel 			    (resource != NULL &&
   1409      0    stevel 			    (curproc->p_zone->zone_rootpathlen - 1 +
   1410      0    stevel 			    strlen(resource)) > MAXPATHLEN)) {
   1411      0    stevel 				error = ENAMETOOLONG;
   1412      0    stevel 			}
   1413      0    stevel 
   1414      0    stevel 			pn_free(&rpn);
   1415      0    stevel 			pn_free(&pn);
   1416      0    stevel 		}
   1417      0    stevel 
   1418      0    stevel 		if (error)
   1419      0    stevel 			goto errout;
   1420      0    stevel 
   1421      0    stevel 		/*
   1422      0    stevel 		 * Prevent path name resolution from proceeding past
   1423      0    stevel 		 * the mount point.
   1424      0    stevel 		 */
   1425      0    stevel 		if (vn_vfswlock(vp) != 0) {
   1426      0    stevel 			error = EBUSY;
   1427      0    stevel 			goto errout;
   1428      0    stevel 		}
   1429      0    stevel 
   1430      0    stevel 		/*
   1431      0    stevel 		 * Verify that it's legitimate to establish a mount on
   1432      0    stevel 		 * the prospective mount point.
   1433      0    stevel 		 */
   1434      0    stevel 		if (vn_mountedvfs(vp) != NULL) {
   1435      0    stevel 			/*
   1436      0    stevel 			 * The mount point lock was obtained after some
   1437      0    stevel 			 * other thread raced through and established a mount.
   1438      0    stevel 			 */
   1439      0    stevel 			vn_vfsunlock(vp);
   1440      0    stevel 			error = EBUSY;
   1441      0    stevel 			goto errout;
   1442      0    stevel 		}
   1443      0    stevel 		if (vp->v_flag & VNOMOUNT) {
   1444      0    stevel 			vn_vfsunlock(vp);
   1445      0    stevel 			error = EINVAL;
   1446      0    stevel 			goto errout;
   1447      0    stevel 		}
   1448      0    stevel 	}
   1449      0    stevel 	if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) {
   1450      0    stevel 		uap->dataptr = NULL;
   1451      0    stevel 		uap->datalen = 0;
   1452      0    stevel 	}
   1453      0    stevel 
   1454      0    stevel 	/*
   1455      0    stevel 	 * If this is a remount, we don't want to create a new VFS.
   1456      0    stevel 	 * Instead, we pass the existing one with a remount flag.
   1457      0    stevel 	 */
   1458      0    stevel 	if (remount) {
   1459      0    stevel 		/*
   1460      0    stevel 		 * Confirm that the mount point is the root vnode of the
   1461      0    stevel 		 * file system that is being remounted.
   1462      0    stevel 		 * This can happen if the user specifies a different
   1463      0    stevel 		 * mount point directory pathname in the (re)mount command.
   1464      0    stevel 		 *
   1465      0    stevel 		 * Code below can only be reached if splice is true, so it's
   1466      0    stevel 		 * safe to do vn_vfsunlock() here.
   1467      0    stevel 		 */
   1468      0    stevel 		if ((vp->v_flag & VROOT) == 0) {
   1469      0    stevel 			vn_vfsunlock(vp);
   1470      0    stevel 			error = ENOENT;
   1471      0    stevel 			goto errout;
   1472      0    stevel 		}
   1473      0    stevel 		/*
   1474      0    stevel 		 * Disallow making file systems read-only unless file system
   1475      0    stevel 		 * explicitly allows it in its vfssw.  Ignore other flags.
   1476      0    stevel 		 */
   1477      0    stevel 		if (rdonly && vn_is_readonly(vp) == 0 &&
   1478      0    stevel 		    (vswp->vsw_flag & VSW_CANRWRO) == 0) {
   1479      0    stevel 			vn_vfsunlock(vp);
   1480      0    stevel 			error = EINVAL;
   1481      0    stevel 			goto errout;
   1482      0    stevel 		}
   1483      0    stevel 		/*
   1484   5331       amw 		 * Disallow changing the NBMAND disposition of the file
   1485   5331       amw 		 * system on remounts.
   1486      0    stevel 		 */
   1487      0    stevel 		if ((nbmand && ((vp->v_vfsp->vfs_flag & VFS_NBMAND) == 0)) ||
   1488      0    stevel 		    (!nbmand && (vp->v_vfsp->vfs_flag & VFS_NBMAND))) {
   1489   5331       amw 			vn_vfsunlock(vp);
   1490   5331       amw 			error = EINVAL;
   1491   5331       amw 			goto errout;
   1492      0    stevel 		}
   1493      0    stevel 		vfsp = vp->v_vfsp;
   1494      0    stevel 		ovflags = vfsp->vfs_flag;
   1495      0    stevel 		vfsp->vfs_flag |= VFS_REMOUNT;
   1496      0    stevel 		vfsp->vfs_flag &= ~VFS_RDONLY;
   1497      0    stevel 	} else {
   1498   5331       amw 		vfsp = vfs_alloc(KM_SLEEP);
   1499      0    stevel 		VFS_INIT(vfsp, vfsops, NULL);
   1500      0    stevel 	}
   1501      0    stevel 
   1502      0    stevel 	VFS_HOLD(vfsp);
   1503      0    stevel 
   1504   6734   johnlev 	if ((error = lofi_add(fsname, vfsp, &mnt_mntopts, uap)) != 0) {
   1505   6734   johnlev 		if (!remount) {
   1506   6734   johnlev 			if (splice)
   1507   6734   johnlev 				vn_vfsunlock(vp);
   1508   6734   johnlev 			vfs_free(vfsp);
   1509   6734   johnlev 		} else {
   1510   6734   johnlev 			vn_vfsunlock(vp);
   1511   6734   johnlev 			VFS_RELE(vfsp);
   1512   6734   johnlev 		}
   1513   6734   johnlev 		goto errout;
   1514   6734   johnlev 	}
   1515   6734   johnlev 
   1516   6734   johnlev 	/*
   1517   6734   johnlev 	 * PRIV_SYS_MOUNT doesn't mean you can become root.
   1518   6734   johnlev 	 */
   1519   6734   johnlev 	if (vfsp->vfs_lofi_minor != 0) {
   1520   6734   johnlev 		uap->flags |= MS_NOSUID;
   1521   6734   johnlev 		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
   1522   6734   johnlev 	}
   1523   6734   johnlev 
   1524      0    stevel 	/*
   1525      0    stevel 	 * The vfs_reflock is not used anymore the code below explicitly
   1526      0    stevel 	 * holds it preventing others accesing it directly.
   1527      0    stevel 	 */
   1528      0    stevel 	if ((sema_tryp(&vfsp->vfs_reflock) == 0) &&
   1529      0    stevel 	    !(vfsp->vfs_flag & VFS_REMOUNT))
   1530      0    stevel 		cmn_err(CE_WARN,
   1531   3446       mrj 		    "mount type %s couldn't get vfs_reflock", vswp->vsw_name);
   1532      0    stevel 
   1533      0    stevel 	/*
   1534      0    stevel 	 * Lock the vfs. If this is a remount we want to avoid spurious umount
   1535      0    stevel 	 * failures that happen as a side-effect of fsflush() and other mount
   1536      0    stevel 	 * and unmount operations that might be going on simultaneously and
   1537      0    stevel 	 * may have locked the vfs currently. To not return EBUSY immediately
   1538      0    stevel 	 * here we use vfs_lock_wait() instead vfs_lock() for the remount case.
   1539      0    stevel 	 */
   1540      0    stevel 	if (!remount) {
   1541      0    stevel 		if (error = vfs_lock(vfsp)) {
   1542      0    stevel 			vfsp->vfs_flag = ovflags;
   1543   6734   johnlev 
   1544   6734   johnlev 			lofi_remove(vfsp);
   1545   6734   johnlev 
   1546      0    stevel 			if (splice)
   1547      0    stevel 				vn_vfsunlock(vp);
   1548   5331       amw 			vfs_free(vfsp);
   1549      0    stevel 			goto errout;
   1550      0    stevel 		}
   1551      0    stevel 	} else {
   1552      0    stevel 		vfs_lock_wait(vfsp);
   1553      0    stevel 	}
   1554      0    stevel 
   1555      0    stevel 	/*
   1556      0    stevel 	 * Add device to mount in progress table, global mounts require special
   1557      0    stevel 	 * handling. It is possible that we have already done the lookupname
   1558      0    stevel 	 * on a spliced, non-global fs. If so, we don't want to do it again
   1559      0    stevel 	 * since we cannot do a lookupname after taking the
   1560      0    stevel 	 * wlock above. This case is for a non-spliced, non-global filesystem.
   1561      0    stevel 	 */
   1562      0    stevel 	if (!addmip) {
   1563   3912     lling 		if ((uap->flags & MS_GLOBAL) == 0 &&
   1564   3912     lling 		    lookupname(uap->spec, fromspace, FOLLOW, NULL, &bvp) == 0) {
   1565      0    stevel 			addmip = 1;
   1566      0    stevel 		}
   1567      0    stevel 	}
   1568      0    stevel 
   1569      0    stevel 	if (addmip) {
   1570   6734   johnlev 		vnode_t *lvp = NULL;
   1571   6734   johnlev 
   1572   6734   johnlev 		error = vfs_get_lofi(vfsp, &lvp);
   1573   6734   johnlev 		if (error > 0) {
   1574   6734   johnlev 			lofi_remove(vfsp);
   1575   6734   johnlev 
   1576   6734   johnlev 			if (splice)
   1577   6734   johnlev 				vn_vfsunlock(vp);
   1578   6734   johnlev 			vfs_unlock(vfsp);
   1579   6734   johnlev 
   1580   6734   johnlev 			if (remount) {
   1581   6734   johnlev 				VFS_RELE(vfsp);
   1582   6734   johnlev 			} else {
   1583   6734   johnlev 				vfs_free(vfsp);
   1584   6734   johnlev 			}
   1585   6734   johnlev 
   1586   6734   johnlev 			goto errout;
   1587   6734   johnlev 		} else if (error == -1) {
   1588   6734   johnlev 			bdev = bvp->v_rdev;
   1589   6734   johnlev 			VN_RELE(bvp);
   1590   6734   johnlev 		} else {
   1591   6734   johnlev 			bdev = lvp->v_rdev;
   1592   6734   johnlev 			VN_RELE(lvp);
   1593   6734   johnlev 			VN_RELE(bvp);
   1594   6734   johnlev 		}
   1595   6734   johnlev 
   1596      0    stevel 		vfs_addmip(bdev, vfsp);
   1597      0    stevel 		addmip = 0;
   1598      0    stevel 		delmip = 1;
   1599      0    stevel 	}
   1600      0    stevel 	/*
   1601      0    stevel 	 * Invalidate cached entry for the mount point.
   1602      0    stevel 	 */
   1603      0    stevel 	if (splice)
   1604      0    stevel 		dnlc_purge_vp(vp);
   1605      0    stevel 
   1606      0    stevel 	/*
   1607      0    stevel 	 * If have an option string but the filesystem doesn't supply a
   1608      0    stevel 	 * prototype options table, create a table with the global
   1609      0    stevel 	 * options and sufficient room to accept all the options in the
   1610      0    stevel 	 * string.  Then parse the passed in option string
   1611      0    stevel 	 * accepting all the options in the string.  This gives us an
   1612      0    stevel 	 * option table with all the proper cancel properties for the
   1613      0    stevel 	 * global options.
   1614      0    stevel 	 *
   1615      0    stevel 	 * Filesystems that supply a prototype options table are handled
   1616      0    stevel 	 * earlier in this function.
   1617      0    stevel 	 */
   1618      0    stevel 	if (uap->flags & MS_OPTIONSTR) {
   1619      0    stevel 		if (!(vswp->vsw_flag & VSW_HASPROTO)) {
   1620      0    stevel 			mntopts_t tmp_mntopts;
   1621      0    stevel 
   1622      0    stevel 			tmp_mntopts.mo_count = 0;
   1623      0    stevel 			vfs_createopttbl_extend(&tmp_mntopts, inargs,
   1624      0    stevel 			    &mnt_mntopts);
   1625      0    stevel 			vfs_parsemntopts(&tmp_mntopts, inargs, 1);
   1626      0    stevel 			vfs_swapopttbl_nolock(&mnt_mntopts, &tmp_mntopts);
   1627      0    stevel 			vfs_freeopttbl(&tmp_mntopts);
   1628      0    stevel 		}
   1629      0    stevel 	}
   1630      0    stevel 
   1631      0    stevel 	/*
   1632      0    stevel 	 * Serialize with zone creations.
   1633      0    stevel 	 */
   1634      0    stevel 	mount_in_progress();
   1635      0    stevel 	/*
   1636      0    stevel 	 * Instantiate (or reinstantiate) the file system.  If appropriate,
   1637      0    stevel 	 * splice it into the file system name space.
   1638      0    stevel 	 *
   1639      0    stevel 	 * We want VFS_MOUNT() to be able to override the vfs_resource
   1640      0    stevel 	 * string if necessary (ie, mntfs), and also for a remount to
   1641      0    stevel 	 * change the same (necessary when remounting '/' during boot).
   1642      0    stevel 	 * So we set up vfs_mntpt and vfs_resource to what we think they
   1643      0    stevel 	 * should be, then hand off control to VFS_MOUNT() which can
   1644      0    stevel 	 * override this.
   1645      0    stevel 	 *
   1646      0    stevel 	 * For safety's sake, when changing vfs_resource or vfs_mntpt of
   1647      0    stevel 	 * a vfs which is on the vfs list (i.e. during a remount), we must
   1648      0    stevel 	 * never set those fields to NULL. Several bits of code make
   1649      0    stevel 	 * assumptions that the fields are always valid.
   1650      0    stevel 	 */
   1651      0    stevel 	vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
   1652      0    stevel 	if (remount) {
   1653      0    stevel 		if ((oldresource = vfsp->vfs_resource) != NULL)
   1654      0    stevel 			refstr_hold(oldresource);
   1655      0    stevel 		if ((oldmntpt = vfsp->vfs_mntpt) != NULL)
   1656      0    stevel 			refstr_hold(oldmntpt);
   1657      0    stevel 	}
   1658      0    stevel 	vfs_setresource(vfsp, resource);
   1659      0    stevel 	vfs_setmntpoint(vfsp, mountpt);
   1660      0    stevel 
   1661   4863     praks 	/*
   1662   4863     praks 	 * going to mount on this vnode, so notify.
   1663   4863     praks 	 */
   1664   5331       amw 	vnevent_mountedover(vp, NULL);
   1665      0    stevel 	error = VFS_MOUNT(vfsp, vp, uap, credp);
   1666      0    stevel 
   1667      0    stevel 	if (uap->flags & MS_RDONLY)
   1668      0    stevel 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
   1669      0    stevel 	if (uap->flags & MS_NOSUID)
   1670      0    stevel 		vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0);
   1671      0    stevel 	if (uap->flags & MS_GLOBAL)
   1672      0    stevel 		vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0);
   1673      0    stevel 
   1674      0    stevel 	if (error) {
   1675   6734   johnlev 		lofi_remove(vfsp);
   1676   6734   johnlev 
   1677      0    stevel 		if (remount) {
   1678      0    stevel 			/* put back pre-remount options */
   1679      0    stevel 			vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
   1680    994     evanl 			vfs_setmntpoint(vfsp, (stripzonepath(
   1681   3912     lling 			    refstr_value(oldmntpt))));
   1682      0    stevel 			if (oldmntpt)
   1683      0    stevel 				refstr_rele(oldmntpt);
   1684    994     evanl 			vfs_setresource(vfsp, (stripzonepath(
   1685   3912     lling 			    refstr_value(oldresource))));
   1686      0    stevel 			if (oldresource)
   1687      0    stevel 				refstr_rele(oldresource);
   1688      0    stevel 			vfsp->vfs_flag = ovflags;
   1689      0    stevel 			vfs_unlock(vfsp);
   1690      0    stevel 			VFS_RELE(vfsp);
   1691      0    stevel 		} else {
   1692      0    stevel 			vfs_unlock(vfsp);
   1693      0    stevel 			vfs_freemnttab(vfsp);
   1694   5331       amw 			vfs_free(vfsp);
   1695      0    stevel 		}
   1696      0    stevel 	} else {
   1697      0    stevel 		/*
   1698      0    stevel 		 * Set the mount time to now
   1699      0    stevel 		 */
   1700      0    stevel 		vfsp->vfs_mtime = ddi_get_time();
   1701      0    stevel 		if (remount) {
   1702      0    stevel 			vfsp->vfs_flag &= ~VFS_REMOUNT;
   1703      0    stevel 			if (oldresource)
   1704      0    stevel 				refstr_rele(oldresource);
   1705      0    stevel 			if (oldmntpt)
   1706      0    stevel 				refstr_rele(oldmntpt);
   1707      0    stevel 		} else if (splice) {
   1708      0    stevel 			/*
   1709      0    stevel 			 * Link vfsp into the name space at the mount
   1710      0    stevel 			 * point. Vfs_add() is responsible for
   1711      0    stevel 			 * holding the mount point which will be
   1712      0    stevel 			 * released when vfs_remove() is called.
   1713      0    stevel 			 */
   1714      0    stevel 			vfs_add(vp, vfsp, uap->flags);
   1715      0    stevel 		} else {
   1716      0    stevel 			/*
   1717      0    stevel 			 * Hold the reference to file system which is
   1718      0    stevel 			 * not linked into the name space.
   1719      0    stevel 			 */
   1720      0    stevel 			vfsp->vfs_zone = NULL;
   1721      0    stevel 			VFS_HOLD(vfsp);
   1722      0    stevel 			vfsp->vfs_vnodecovered = NULL;
   1723      0    stevel 		}
   1724      0    stevel 		/*
   1725      0    stevel 		 * Set flags for global options encountered
   1726      0    stevel 		 */
   1727      0    stevel 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
   1728      0    stevel 			vfsp->vfs_flag |= VFS_RDONLY;
   1729      0    stevel 		else
   1730      0    stevel 			vfsp->vfs_flag &= ~VFS_RDONLY;
   1731      0    stevel 		if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
   1732      0    stevel 			vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES);
   1733      0    stevel 		} else {
   1734      0    stevel 			if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
   1735      0    stevel 				vfsp->vfs_flag |= VFS_NODEVICES;
   1736      0    stevel 			else
   1737      0    stevel 				vfsp->vfs_flag &= ~VFS_NODEVICES;
   1738      0    stevel 			if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
   1739      0    stevel 				vfsp->vfs_flag |= VFS_NOSETUID;
   1740      0    stevel 			else
   1741      0    stevel 				vfsp->vfs_flag &= ~VFS_NOSETUID;
   1742      0    stevel 		}
   1743      0    stevel 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
   1744      0    stevel 			vfsp->vfs_flag |= VFS_NBMAND;
   1745      0    stevel 		else
   1746      0    stevel 			vfsp->vfs_flag &= ~VFS_NBMAND;
   1747      0    stevel 
   1748      0    stevel 		if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
   1749      0    stevel 			vfsp->vfs_flag |= VFS_XATTR;
   1750      0    stevel 		else
   1751      0    stevel 			vfsp->vfs_flag &= ~VFS_XATTR;
   1752      0    stevel 
   1753      0    stevel 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
   1754      0    stevel 			vfsp->vfs_flag |= VFS_NOEXEC;
   1755      0    stevel 		else
   1756      0    stevel 			vfsp->vfs_flag &= ~VFS_NOEXEC;
   1757      0    stevel 
   1758      0    stevel 		/*
   1759      0    stevel 		 * Now construct the output option string of options
   1760      0    stevel 		 * we recognized.
   1761      0    stevel 		 */
   1762      0    stevel 		if (uap->flags & MS_OPTIONSTR) {
   1763      0    stevel 			vfs_list_read_lock();
   1764      0    stevel 			copyout_error = vfs_buildoptionstr(
   1765   3912     lling 			    &vfsp->vfs_mntopts, inargs, optlen);
   1766      0    stevel 			vfs_list_unlock();
   1767      0    stevel 			if (copyout_error == 0 &&
   1768      0    stevel 			    (uap->flags & MS_SYSSPACE) == 0) {
   1769      0    stevel 				copyout_error = copyoutstr(inargs, opts,
   1770      0    stevel 				    optlen, NULL);
   1771      0    stevel 			}
   1772      0    stevel 		}
   1773   1488       rsb 
   1774   1520       rsb 		/*
   1775   1520       rsb 		 * If this isn't a remount, set up the vopstats before
   1776   1678       rsb 		 * anyone can touch this. We only allow spliced file
   1777   1678       rsb 		 * systems (file systems which are in the namespace) to
   1778   1678       rsb 		 * have the VFS_STATS flag set.
   1779   1678       rsb 		 * NOTE: PxFS mounts the underlying file system with
   1780   1678       rsb 		 * MS_NOSPLICE set and copies those vfs_flags to its private
   1781   1678       rsb 		 * vfs structure. As a result, PxFS should never have
   1782   1678       rsb 		 * the VFS_STATS flag or else we might access the vfs
   1783   1678       rsb 		 * statistics-related fields prior to them being
   1784   1678       rsb 		 * properly initialized.
   1785   1678       rsb 		 */
   1786   1678       rsb 		if (!remount && (vswp->vsw_flag & VSW_STATS) && splice) {
   1787   1520       rsb 			initialize_vopstats(&vfsp->vfs_vopstats);
   1788   1520       rsb 			/*
   1789   1520       rsb 			 * We need to set vfs_vskap to NULL because there's
   1790   1520       rsb 			 * a chance it won't be set below.  This is checked
   1791   1520       rsb 			 * in teardown_vopstats() so we can't have garbage.
   1792   1520       rsb 			 */
   1793   1520       rsb 			vfsp->vfs_vskap = NULL;
   1794   1488       rsb 			vfsp->vfs_flag |= VFS_STATS;
   1795   1520       rsb 			vfsp->vfs_fstypevsp = get_fstype_vopstats(vfsp, vswp);
   1796   1488       rsb 		}
   1797   1488       rsb 
   1798   4321    casper 		if (vswp->vsw_flag & VSW_XID)
   1799   4321    casper 			vfsp->vfs_flag |= VFS_XID;
   1800   4321    casper 
   1801      0    stevel 		vfs_unlock(vfsp);
   1802      0    stevel 	}
   1803      0    stevel 	mount_completed();
   1804      0    stevel 	if (splice)
   1805      0    stevel 		vn_vfsunlock(vp);
   1806      0    stevel 
   1807      0    stevel 	if ((error == 0) && (copyout_error == 0)) {
   1808   1520       rsb 		if (!remount) {
   1809   1520       rsb 			/*
   1810   1520       rsb 			 * Don't call get_vskstat_anchor() while holding
   1811   1520       rsb 			 * locks since it allocates memory and calls
   1812   1520       rsb 			 * VFS_STATVFS().  For NFS, the latter can generate
   1813   1520       rsb 			 * an over-the-wire call.
   1814   1520       rsb 			 */
   1815   1520       rsb 			vskap = get_vskstat_anchor(vfsp);
   1816   1520       rsb 			/* Only take the lock if we have something to do */
   1817   1520       rsb 			if (vskap != NULL) {
   1818   1520       rsb 				vfs_lock_wait(vfsp);
   1819   1520       rsb 				if (vfsp->vfs_flag & VFS_STATS) {
   1820   1520       rsb 					vfsp->vfs_vskap = vskap;
   1821   1520       rsb 				}
   1822   1520       rsb 				vfs_unlock(vfsp);
   1823   1520       rsb 			}
   1824   1520       rsb 		}
   1825   1488       rsb 		/* Return vfsp to caller. */
   1826      0    stevel 		*vfspp = vfsp;
   1827      0    stevel 	}
   1828      0    stevel errout:
   1829      0    stevel 	vfs_freeopttbl(&mnt_mntopts);
   1830      0    stevel 	if (resource != NULL)
   1831      0    stevel 		kmem_free(resource, strlen(resource) + 1);
   1832      0    stevel 	if (mountpt != NULL)
   1833      0    stevel 		kmem_free(mountpt, strlen(mountpt) + 1);
   1834      0    stevel 	/*
   1835      0    stevel 	 * It is possible we errored prior to adding to mount in progress
   1836      0    stevel 	 * table. Must free vnode we acquired with successful lookupname.
   1837      0    stevel 	 */
   1838      0    stevel 	if (addmip)
   1839      0    stevel 		VN_RELE(bvp);
   1840      0    stevel 	if (delmip)
   1841      0    stevel 		vfs_delmip(vfsp);
   1842      0    stevel 	ASSERT(vswp != NULL);
   1843      0    stevel 	vfs_unrefvfssw(vswp);
   1844      0    stevel 	if (inargs != opts)
   1845      0    stevel 		kmem_free(inargs, MAX_MNTOPT_STR);
   1846      0    stevel 	if (copyout_error) {
   1847   6734   johnlev 		lofi_remove(vfsp);
   1848      0    stevel 		VFS_RELE(vfsp);
   1849      0    stevel 		error = copyout_error;
   1850      0    stevel 	}
   1851      0    stevel 	return (error);
   1852      0    stevel }
   1853      0    stevel 
   1854      0    stevel static void
   1855      0    stevel vfs_setpath(struct vfs *vfsp, refstr_t **refp, const char *newpath)
   1856      0    stevel {
   1857      0    stevel 	size_t len;
   1858      0    stevel 	refstr_t *ref;
   1859      0    stevel 	zone_t *zone = curproc->p_zone;
   1860      0    stevel 	char *sp;
   1861      0    stevel 	int have_list_lock = 0;
   1862      0    stevel 
   1863      0    stevel 	ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp));
   1864      0    stevel 
   1865      0    stevel 	/*
   1866      0    stevel 	 * New path must be less than MAXPATHLEN because mntfs
   1867      0    stevel 	 * will only display up to MAXPATHLEN bytes. This is currently
   1868      0    stevel 	 * safe, because domount() uses pn_get(), and other callers
   1869      0    stevel 	 * similarly cap the size to fewer than MAXPATHLEN bytes.
   1870      0    stevel 	 */
   1871      0    stevel 
   1872      0    stevel 	ASSERT(strlen(newpath) < MAXPATHLEN);
   1873      0    stevel 
   1874      0    stevel 	/* mntfs requires consistency while vfs list lock is held */
   1875      0    stevel 
   1876      0    stevel 	if (VFS_ON_LIST(vfsp)) {
   1877      0    stevel 		have_list_lock = 1;
   1878      0    stevel 		vfs_list_lock();
   1879      0    stevel 	}
   1880      0    stevel 
   1881      0    stevel 	if (*refp != NULL)
   1882      0    stevel 		refstr_rele(*refp);
   1883      0    stevel 
   1884      0    stevel 	/* Do we need to modify the path? */
   1885      0    stevel 
   1886      0    stevel 	if (zone == global_zone || *newpath != '/') {
   1887      0    stevel 		ref = refstr_alloc(newpath);
   1888      0    stevel 		goto out;
   1889      0    stevel 	}
   1890      0    stevel 
   1891      0    stevel 	/*
   1892      0    stevel 	 * Truncate the trailing '/' in the zoneroot, and merge
   1893      0    stevel 	 * in the zone's rootpath with the "newpath" (resource
   1894      0    stevel 	 * or mountpoint) passed in.
   1895      0    stevel 	 *
   1896      0    stevel 	 * The size of the required buffer is thus the size of
   1897      0    stevel 	 * the buffer required for the passed-in newpath
   1898      0    stevel 	 * (strlen(newpath) + 1), plus the size of the buffer
   1899      0    stevel 	 * required to hold zone_rootpath (zone_rootpathlen)
   1900      0    stevel 	 * minus one for one of the now-superfluous NUL
   1901      0    stevel 	 * terminations, minus one for the trailing '/'.
   1902      0    stevel 	 *
   1903      0    stevel 	 * That gives us:
   1904      0    stevel 	 *
   1905      0    stevel 	 * (strlen(newpath) + 1) + zone_rootpathlen - 1 - 1
   1906      0    stevel 	 *
   1907      0    stevel 	 * Which is what we have below.
   1908      0    stevel 	 */
   1909      0    stevel 
   1910      0    stevel 	len = strlen(newpath) + zone->zone_rootpathlen - 1;
   1911      0    stevel 	sp = kmem_alloc(len, KM_SLEEP);
   1912      0    stevel 
   1913      0    stevel 	/*
   1914      0    stevel 	 * Copy everything including the trailing slash, which
   1915      0    stevel 	 * we then overwrite with the NUL character.
   1916      0    stevel 	 */
   1917      0    stevel 
   1918      0    stevel 	(void) strcpy(sp, zone->zone_rootpath);
   1919      0    stevel 	sp[zone->zone_rootpathlen - 2] = '\0';
   1920      0    stevel 	(void) strcat(sp, newpath);
   1921      0    stevel 
   1922      0    stevel 	ref = refstr_alloc(sp);
   1923      0    stevel 	kmem_free(sp, len);
   1924      0    stevel out:
   1925      0    stevel 	*refp = ref;
   1926      0    stevel 
   1927      0    stevel 	if (have_list_lock) {
   1928      0    stevel 		vfs_mnttab_modtimeupd();
   1929      0    stevel 		vfs_list_unlock();
   1930      0    stevel 	}
   1931      0    stevel }
   1932      0    stevel 
   1933      0    stevel /*
   1934      0    stevel  * Record a mounted resource name in a vfs structure.
   1935      0    stevel  * If vfsp is already mounted, caller must hold the vfs lock.
   1936      0    stevel  */
   1937      0    stevel void
   1938      0    stevel vfs_setresource(struct vfs *vfsp, const char *resource)
   1939      0    stevel {
   1940      0    stevel 	if (resource == NULL || resource[0] == '\0')
   1941      0    stevel 		resource = VFS_NORESOURCE;
   1942      0    stevel 	vfs_setpath(vfsp, &vfsp->vfs_resource, resource);
   1943      0    stevel }
   1944      0    stevel 
   1945      0    stevel /*
   1946      0    stevel  * Record a mount point name in a vfs structure.
   1947      0    stevel  * If vfsp is already mounted, caller must hold the vfs lock.
   1948      0    stevel  */
   1949      0    stevel void
   1950      0    stevel vfs_setmntpoint(struct vfs *vfsp, const char *mntpt)
   1951      0    stevel {
   1952      0    stevel 	if (mntpt == NULL || mntpt[0] == '\0')
   1953      0    stevel 		mntpt = VFS_NOMNTPT;
   1954      0    stevel 	vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt);
   1955      0    stevel }
   1956      0    stevel 
   1957      0    stevel /* Returns the vfs_resource. Caller must call refstr_rele() when finished. */
   1958      0    stevel 
   1959      0    stevel refstr_t *
   1960      0    stevel vfs_getresource(const struct vfs *vfsp)
   1961      0    stevel {
   1962      0    stevel 	refstr_t *resource;
   1963      0    stevel 
   1964      0    stevel 	vfs_list_read_lock();
   1965      0    stevel 	resource = vfsp->vfs_resource;
   1966      0    stevel 	refstr_hold(resource);
   1967      0    stevel 	vfs_list_unlock();
   1968      0    stevel 
   1969      0    stevel 	return (resource);
   1970      0    stevel }
   1971      0    stevel 
   1972      0    stevel /* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */
   1973      0    stevel 
   1974      0    stevel refstr_t *
   1975      0    stevel vfs_getmntpoint(const struct vfs *vfsp)
   1976      0    stevel {
   1977      0    stevel 	refstr_t *mntpt;
   1978      0    stevel 
   1979      0    stevel 	vfs_list_read_lock();
   1980      0    stevel 	mntpt = vfsp->vfs_mntpt;
   1981      0    stevel 	refstr_hold(mntpt);
   1982      0    stevel 	vfs_list_unlock();
   1983      0    stevel 
   1984      0    stevel 	return (mntpt);
   1985      0    stevel }
   1986      0    stevel 
   1987      0    stevel /*
   1988      0    stevel  * Create an empty options table with enough empty slots to hold all
   1989      0    stevel  * The options in the options string passed as an argument.
   1990      0    stevel  * Potentially prepend another options table.
   1991      0    stevel  *
   1992      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   1993      0    stevel  *       to protect mops.
   1994      0    stevel  */
   1995      0    stevel static void
   1996      0    stevel vfs_createopttbl_extend(mntopts_t *mops, const char *opts,
   1997      0    stevel     const mntopts_t *mtmpl)
   1998      0    stevel {
   1999      0    stevel 	const char *s = opts;
   2000      0    stevel 	uint_t count;
   2001      0    stevel 
   2002      0    stevel 	if (opts == NULL || *opts == '\0') {
   2003      0    stevel 		count = 0;
   2004      0    stevel 	} else {
   2005      0    stevel 		count = 1;
   2006      0    stevel 
   2007      0    stevel 		/*
   2008      0    stevel 		 * Count number of options in the string
   2009      0    stevel 		 */
   2010      0    stevel 		for (s = strchr(s, ','); s != NULL; s = strchr(s, ',')) {
   2011      0    stevel 			count++;
   2012      0    stevel 			s++;
   2013      0    stevel 		}
   2014      0    stevel 	}
   2015      0    stevel 	vfs_copyopttbl_extend(mtmpl, mops, count);
   2016      0    stevel }
   2017      0    stevel 
   2018      0    stevel /*
   2019      0    stevel  * Create an empty options table with enough empty slots to hold all
   2020      0    stevel  * The options in the options string passed as an argument.
   2021      0    stevel  *
   2022      0    stevel  * This function is *not* for general use by filesystems.
   2023      0    stevel  *
   2024      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2025      0    stevel  *       to protect mops.
   2026      0    stevel  */
   2027      0    stevel void
   2028      0    stevel vfs_createopttbl(mntopts_t *mops, const char *opts)
   2029      0    stevel {
   2030      0    stevel 	vfs_createopttbl_extend(mops, opts, NULL);
   2031      0    stevel }
   2032      0    stevel 
   2033      0    stevel 
   2034      0    stevel /*
   2035      0    stevel  * Swap two mount options tables
   2036      0    stevel  */
   2037      0    stevel static void
   2038      0    stevel vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2)
   2039      0    stevel {
   2040      0    stevel 	uint_t tmpcnt;
   2041      0    stevel 	mntopt_t *tmplist;
   2042      0    stevel 
   2043      0    stevel 	tmpcnt = optbl2->mo_count;
   2044      0    stevel 	tmplist = optbl2->mo_list;
   2045      0    stevel 	optbl2->mo_count = optbl1->mo_count;
   2046      0    stevel 	optbl2->mo_list = optbl1->mo_list;
   2047      0    stevel 	optbl1->mo_count = tmpcnt;
   2048      0    stevel 	optbl1->mo_list = tmplist;
   2049      0    stevel }
   2050      0    stevel 
   2051      0    stevel static void
   2052      0    stevel vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2)
   2053      0    stevel {
   2054      0    stevel 	vfs_list_lock();
   2055      0    stevel 	vfs_swapopttbl_nolock(optbl1, optbl2);
   2056      0    stevel 	vfs_mnttab_modtimeupd();
   2057      0    stevel 	vfs_list_unlock();
   2058      0    stevel }
   2059      0    stevel 
   2060      0    stevel static char **
   2061      0    stevel vfs_copycancelopt_extend(char **const moc, int extend)
   2062      0    stevel {
   2063      0    stevel 	int i = 0;
   2064      0    stevel 	int j;
   2065      0    stevel 	char **result;
   2066      0    stevel 
   2067      0    stevel 	if (moc != NULL) {
   2068      0    stevel 		for (; moc[i] != NULL; i++)
   2069      0    stevel 			/* count number of options to cancel */;
   2070      0    stevel 	}
   2071      0    stevel 
   2072      0    stevel 	if (i + extend == 0)
   2073      0    stevel 		return (NULL);
   2074      0    stevel 
   2075      0    stevel 	result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP);
   2076      0    stevel 
   2077      0    stevel 	for (j = 0; j < i; j++) {
   2078      0    stevel 		result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP);
   2079      0    stevel 		(void) strcpy(result[j], moc[j]);
   2080      0    stevel 	}
   2081      0    stevel 	for (; j <= i + extend; j++)
   2082      0    stevel 		result[j] = NULL;
   2083      0    stevel 
   2084      0    stevel 	return (result);
   2085      0    stevel }
   2086      0    stevel 
   2087      0    stevel static void
   2088      0    stevel vfs_copyopt(const mntopt_t *s, mntopt_t *d)
   2089      0    stevel {
   2090      0    stevel 	char *sp, *dp;
   2091      0    stevel 
   2092      0    stevel 	d->mo_flags = s->mo_flags;
   2093      0    stevel 	d->mo_data = s->mo_data;
   2094      0    stevel 	sp = s->mo_name;
   2095      0    stevel 	if (sp != NULL) {
   2096      0    stevel 		dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
   2097      0    stevel 		(void) strcpy(dp, sp);
   2098      0    stevel 		d->mo_name = dp;
   2099      0    stevel 	} else {
   2100      0    stevel 		d->mo_name = NULL; /* should never happen */
   2101      0    stevel 	}
   2102      0    stevel 
   2103      0    stevel 	d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0);
   2104      0    stevel 
   2105      0    stevel 	sp = s->mo_arg;
   2106      0    stevel 	if (sp != NULL) {
   2107      0    stevel 		dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
   2108      0    stevel 		(void) strcpy(dp, sp);
   2109      0    stevel 		d->mo_arg = dp;
   2110      0    stevel 	} else {
   2111      0    stevel 		d->mo_arg = NULL;
   2112      0    stevel 	}
   2113      0    stevel }
   2114      0    stevel 
   2115      0    stevel /*
   2116      0    stevel  * Copy a mount options table, possibly allocating some spare
   2117      0    stevel  * slots at the end.  It is permissible to copy_extend the NULL table.
   2118      0    stevel  */
   2119      0    stevel static void
   2120      0    stevel vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra)
   2121      0    stevel {
   2122      0    stevel 	uint_t i, count;
   2123      0    stevel 	mntopt_t *motbl;
   2124      0    stevel 
   2125      0    stevel 	/*
   2126      0    stevel 	 * Clear out any existing stuff in the options table being initialized
   2127      0    stevel 	 */
   2128      0    stevel 	vfs_freeopttbl(dmo);
   2129      0    stevel 	count = (smo == NULL) ? 0 : smo->mo_count;
   2130      0    stevel 	if ((count + extra) == 0)	/* nothing to do */
   2131      0    stevel 		return;
   2132      0    stevel 	dmo->mo_count = count + extra;
   2133      0    stevel 	motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP);
   2134      0    stevel 	dmo->mo_list = motbl;
   2135      0    stevel 	for (i = 0; i < count; i++) {
   2136      0    stevel 		vfs_copyopt(&smo->mo_list[i], &motbl[i]);
   2137      0    stevel 	}
   2138      0    stevel 	for (i = count; i < count + extra; i++) {
   2139      0    stevel 		motbl[i].mo_flags = MO_EMPTY;
   2140      0    stevel 	}
   2141      0    stevel }
   2142      0    stevel 
   2143      0    stevel /*
   2144      0    stevel  * Copy a mount options table.
   2145      0    stevel  *
   2146      0    stevel  * This function is *not* for general use by filesystems.
   2147      0    stevel  *
   2148      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2149      0    stevel  *       to protect smo and dmo.
   2150      0    stevel  */
   2151      0    stevel void
   2152      0    stevel vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo)
   2153      0    stevel {
   2154      0    stevel 	vfs_copyopttbl_extend(smo, dmo, 0);
   2155      0    stevel }
   2156      0    stevel 
   2157      0    stevel static char **
   2158      0    stevel vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2)
   2159      0    stevel {
   2160      0    stevel 	int c1 = 0;
   2161      0    stevel 	int c2 = 0;
   2162      0    stevel 	char **result;
   2163      0    stevel 	char **sp1, **sp2, **dp;
   2164      0    stevel 
   2165      0    stevel 	/*
   2166      0    stevel 	 * First we count both lists of cancel options.
   2167      0    stevel 	 * If either is NULL or has no elements, we return a copy of
   2168      0    stevel 	 * the other.
   2169      0    stevel 	 */
   2170      0    stevel 	if (mop1->mo_cancel != NULL) {
   2171      0    stevel 		for (; mop1->mo_cancel[c1] != NULL; c1++)
   2172      0    stevel 			/* count cancel options in mop1 */;
   2173      0    stevel 	}
   2174      0    stevel 
   2175      0    stevel 	if (c1 == 0)
   2176      0    stevel 		return (vfs_copycancelopt_extend(mop2->mo_cancel, 0));
   2177      0    stevel 
   2178      0    stevel 	if (mop2->mo_cancel != NULL) {
   2179      0    stevel 		for (; mop2->mo_cancel[c2] != NULL; c2++)
   2180      0    stevel 			/* count cancel options in mop2 */;
   2181      0    stevel 	}
   2182      0    stevel 
   2183      0    stevel 	result = vfs_copycancelopt_extend(mop1->mo_cancel, c2);
   2184      0    stevel 
   2185      0    stevel 	if (c2 == 0)
   2186      0    stevel 		return (result);
   2187      0    stevel 
   2188      0    stevel 	/*
   2189      0    stevel 	 * When we get here, we've got two sets of cancel options;
   2190      0    stevel 	 * we need to merge the two sets.  We know that the result
   2191      0    stevel 	 * array has "c1+c2+1" entries and in the end we might shrink
   2192      0    stevel 	 * it.
   2193      0    stevel 	 * Result now has a copy of the c1 entries from mop1; we'll
   2194      0    stevel 	 * now lookup all the entries of mop2 in mop1 and copy it if
   2195      0    stevel 	 * it is unique.
   2196      0    stevel 	 * This operation is O(n^2) but it's only called once per
   2197      0    stevel 	 * filesystem per duplicate option.  This is a situation
   2198      0    stevel 	 * which doesn't arise with the filesystems in ON and
   2199      0    stevel 	 * n is generally 1.
   2200      0    stevel 	 */
   2201      0    stevel 
   2202      0    stevel 	dp = &result[c1];
   2203      0    stevel 	for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) {
   2204      0    stevel 		for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) {
   2205      0    stevel 			if (strcmp(*sp1, *sp2) == 0)
   2206      0    stevel 				break;
   2207      0    stevel 		}
   2208      0    stevel 		if (*sp1 == NULL) {
   2209      0    stevel 			/*
   2210      0    stevel 			 * Option *sp2 not found in mop1, so copy it.
   2211      0    stevel 			 * The calls to vfs_copycancelopt_extend()
   2212      0    stevel 			 * guarantee that there's enough room.
   2213      0    stevel 			 */
   2214      0    stevel 			*dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP);
   2215      0    stevel 			(void) strcpy(*dp++, *sp2);
   2216      0    stevel 		}
   2217      0    stevel 	}
   2218      0    stevel 	if (dp != &result[c1+c2]) {
   2219      0    stevel 		size_t bytes = (dp - result + 1) * sizeof (char *);
   2220      0    stevel 		char **nres = kmem_alloc(bytes, KM_SLEEP);
   2221      0    stevel 
   2222      0    stevel 		bcopy(result, nres, bytes);
   2223      0    stevel 		kmem_free(result, (c1 + c2 + 1) * sizeof (char *));
   2224      0    stevel 		result = nres;
   2225      0    stevel 	}
   2226      0    stevel 	return (result);
   2227      0    stevel }
   2228      0    stevel 
   2229      0    stevel /*
   2230      0    stevel  * Merge two mount option tables (outer and inner) into one.  This is very
   2231      0    stevel  * similar to "merging" global variables and automatic variables in C.
   2232      0    stevel  *
   2233      0    stevel  * This isn't (and doesn't have to be) fast.
   2234      0    stevel  *
   2235      0    stevel  * This function is *not* for general use by filesystems.
   2236      0    stevel  *
   2237      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2238      0    stevel  *       to protect omo, imo & dmo.
   2239      0    stevel  */
   2240      0    stevel void
   2241      0    stevel vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo)
   2242      0    stevel {
   2243      0    stevel 	uint_t i, count;
   2244      0    stevel 	mntopt_t *mop, *motbl;
   2245      0    stevel 	uint_t freeidx;
   2246      0    stevel 
   2247      0    stevel 	/*
   2248      0    stevel 	 * First determine how much space we need to allocate.
   2249      0    stevel 	 */
   2250      0    stevel 	count = omo->mo_count;
   2251      0    stevel 	for (i = 0; i < imo->mo_count; i++) {
   2252      0    stevel 		if (imo->mo_list[i].mo_flags & MO_EMPTY)
   2253      0    stevel 			continue;
   2254      0    stevel 		if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL)
   2255      0    stevel 			count++;
   2256      0    stevel 	}
   2257      0    stevel 	ASSERT(count >= omo->mo_count &&
   2258      0    stevel 	    count <= omo->mo_count + imo->mo_count);
   2259      0    stevel 	motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP);
   2260      0    stevel 	for (i = 0; i < omo->mo_count; i++)
   2261      0    stevel 		vfs_copyopt(&omo->mo_list[i], &motbl[i]);
   2262      0    stevel 	freeidx = omo->mo_count;
   2263      0    stevel 	for (i = 0; i < imo->mo_count; i++) {
   2264      0    stevel 		if (imo->mo_list[i].mo_flags & MO_EMPTY)
   2265      0    stevel 			continue;
   2266      0    stevel 		if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) {
   2267      0    stevel 			char **newcanp;
   2268      0    stevel 			uint_t index = mop - omo->mo_list;
   2269      0    stevel 
   2270      0    stevel 			newcanp = vfs_mergecancelopts(mop, &motbl[index]);
   2271      0    stevel 
   2272      0    stevel 			vfs_freeopt(&motbl[index]);
   2273      0    stevel 			vfs_copyopt(&imo->mo_list[i], &motbl[index]);
   2274      0    stevel 
   2275      0    stevel 			vfs_freecancelopt(motbl[index].mo_cancel);
   2276      0    stevel 			motbl[index].mo_cancel = newcanp;
   2277      0    stevel 		} else {
   2278      0    stevel 			/*
   2279      0    stevel 			 * If it's a new option, just copy it over to the first
   2280      0    stevel 			 * free location.
   2281      0    stevel 			 */
   2282      0    stevel 			vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]);
   2283      0    stevel 		}
   2284      0    stevel 	}
   2285      0    stevel 	dmo->mo_count = count;
   2286      0    stevel 	dmo->mo_list = motbl;
   2287      0    stevel }
   2288      0    stevel 
   2289      0    stevel /*
   2290      0    stevel  * Functions to set and clear mount options in a mount options table.
   2291      0    stevel  */
   2292      0    stevel 
   2293      0    stevel /*
   2294      0    stevel  * Clear a mount option, if it exists.
   2295      0    stevel  *
   2296      0    stevel  * The update_mnttab arg indicates whether mops is part of a vfs that is on
   2297      0    stevel  * the vfs list.
   2298      0    stevel  */
   2299      0    stevel static void
   2300      0    stevel vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab)
   2301      0    stevel {
   2302      0    stevel 	struct mntopt *mop;
   2303      0    stevel 	uint_t i, count;
   2304      0    stevel 
   2305      0    stevel 	ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
   2306      0    stevel 
   2307      0    stevel 	count = mops->mo_count;
   2308      0    stevel 	for (i = 0; i < count; i++) {
   2309      0    stevel 		mop = &mops->mo_list[i];
   2310      0    stevel 
   2311      0    stevel 		if (mop->mo_flags & MO_EMPTY)
   2312      0    stevel 			continue;
   2313      0    stevel 		if (strcmp(opt, mop->mo_name))
   2314      0    stevel 			continue;
   2315      0    stevel 		mop->mo_flags &= ~MO_SET;
   2316      0    stevel 		if (mop->mo_arg != NULL) {
   2317      0    stevel 			kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
   2318      0    stevel 		}
   2319      0    stevel 		mop->mo_arg = NULL;
   2320      0    stevel 		if (update_mnttab)
   2321      0    stevel 			vfs_mnttab_modtimeupd();
   2322      0    stevel 		break;
   2323      0    stevel 	}
   2324      0    stevel }
   2325      0    stevel 
   2326      0    stevel void
   2327      0    stevel vfs_clearmntopt(struct vfs *vfsp, const char *opt)
   2328      0    stevel {
   2329      0    stevel 	int gotlock = 0;
   2330      0    stevel 
   2331      0    stevel 	if (VFS_ON_LIST(vfsp)) {
   2332      0    stevel 		gotlock = 1;
   2333      0    stevel 		vfs_list_lock();
   2334      0    stevel 	}
   2335      0    stevel 	vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock);
   2336      0    stevel 	if (gotlock)
   2337      0    stevel 		vfs_list_unlock();
   2338      0    stevel }
   2339      0    stevel 
   2340      0    stevel 
   2341      0    stevel /*
   2342      0    stevel  * Set a mount option on.  If it's not found in the table, it's silently
   2343      0    stevel  * ignored.  If the option has MO_IGNORE set, it is still set unless the
   2344      0    stevel  * VFS_NOFORCEOPT bit is set in the flags.  Also, VFS_DISPLAY/VFS_NODISPLAY flag
   2345      0    stevel  * bits can be used to toggle the MO_NODISPLAY bit for the option.
   2346      0    stevel  * If the VFS_CREATEOPT flag bit is set then the first option slot with
   2347      0    stevel  * MO_EMPTY set is created as the option passed in.
   2348      0    stevel  *
   2349      0    stevel  * The update_mnttab arg indicates whether mops is part of a vfs that is on
   2350      0    stevel  * the vfs list.
   2351      0    stevel  */
   2352      0    stevel static void
   2353      0    stevel vfs_setmntopt_nolock(mntopts_t *mops, const char *opt,
   2354      0    stevel     const char *arg, int flags, int update_mnttab)
   2355      0    stevel {
   2356      0    stevel 	mntopt_t *mop;
   2357      0    stevel 	uint_t i, count;
   2358      0    stevel 	char *sp;
   2359      0    stevel 
   2360      0    stevel 	ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
   2361      0    stevel 
   2362      0    stevel 	if (flags & VFS_CREATEOPT) {
   2363      0    stevel 		if (vfs_hasopt(mops, opt) != NULL) {
   2364      0    stevel 			flags &= ~VFS_CREATEOPT;
   2365      0    stevel 		}
   2366      0    stevel 	}
   2367      0    stevel 	count = mops->mo_count;
   2368      0    stevel 	for (i = 0; i < count; i++) {
   2369      0    stevel 		mop = &mops->mo_list[i];
   2370      0    stevel 
   2371      0    stevel 		if (mop->mo_flags & MO_EMPTY) {
   2372      0    stevel 			if ((flags & VFS_CREATEOPT) == 0)
   2373      0    stevel 				continue;
   2374      0    stevel 			sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP);
   2375      0    stevel 			(void) strcpy(sp, opt);
   2376      0    stevel 			mop->mo_name = sp;
   2377      0    stevel 			if (arg != NULL)
   2378      0    stevel 				mop->mo_flags = MO_HASVALUE;
   2379      0    stevel 			else
   2380      0    stevel 				mop->mo_flags = 0;
   2381      0    stevel 		} else if (strcmp(opt, mop->mo_name)) {
   2382      0    stevel 			continue;
   2383      0    stevel 		}
   2384      0    stevel 		if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT))
   2385      0    stevel 			break;
   2386      0    stevel 		if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) {
   2387      0    stevel 			sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP);
   2388      0    stevel 			(void) strcpy(sp, arg);
   2389      0    stevel 		} else {
   2390      0    stevel 			sp = NULL;
   2391      0    stevel 		}
   2392      0    stevel 		if (mop->mo_arg != NULL)
   2393      0    stevel 			kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
   2394      0    stevel 		mop->mo_arg = sp;
   2395      0    stevel 		if (flags & VFS_DISPLAY)
   2396      0    stevel 			mop->mo_flags &= ~MO_NODISPLAY;
   2397      0    stevel 		if (flags & VFS_NODISPLAY)
   2398      0    stevel 			mop->mo_flags |= MO_NODISPLAY;
   2399      0    stevel 		mop->mo_flags |= MO_SET;
   2400      0    stevel 		if (mop->mo_cancel != NULL) {
   2401      0    stevel 			char **cp;
   2402      0    stevel 
   2403      0    stevel 			for (cp = mop->mo_cancel; *cp != NULL; cp++)
   2404      0    stevel 				vfs_clearmntopt_nolock(mops, *cp, 0);
   2405      0    stevel 		}
   2406      0    stevel 		if (update_mnttab)
   2407      0    stevel 			vfs_mnttab_modtimeupd();
   2408      0    stevel 		break;
   2409      0    stevel 	}
   2410      0    stevel }
   2411      0    stevel 
   2412      0    stevel void
   2413      0    stevel vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags)
   2414      0    stevel {
   2415      0    stevel 	int gotlock = 0;
   2416      0    stevel 
   2417      0    stevel 	if (VFS_ON_LIST(vfsp)) {
   2418      0    stevel 		gotlock = 1;
   2419      0    stevel 		vfs_list_lock();
   2420      0    stevel 	}
   2421      0    stevel 	vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock);
   2422      0    stevel 	if (gotlock)
   2423      0    stevel 		vfs_list_unlock();
   2424      0    stevel }
   2425      0    stevel 
   2426      0    stevel 
   2427      0    stevel /*
   2428      0    stevel  * Add a "tag" option to a mounted file system's options list.
   2429      0    stevel  *
   2430      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2431      0    stevel  *       to protect mops.
   2432      0    stevel  */
   2433      0    stevel static mntopt_t *
   2434      0    stevel vfs_addtag(mntopts_t *mops, const char *tag)
   2435      0    stevel {
   2436      0    stevel 	uint_t count;
   2437      0    stevel 	mntopt_t *mop, *motbl;
   2438      0    stevel 
   2439      0    stevel 	count = mops->mo_count + 1;
   2440      0    stevel 	motbl = kmem_zalloc(count * sizeof (mntopt_t), KM_SLEEP);
   2441      0    stevel 	if (mops->mo_count) {
   2442      0    stevel 		size_t len = (count - 1) * sizeof (mntopt_t);
   2443      0    stevel 
   2444      0    stevel 		bcopy(mops->mo_list, motbl, len);
   2445      0    stevel 		kmem_free(mops->mo_list, len);
   2446      0    stevel 	}
   2447      0    stevel 	mops->mo_count = count;
   2448      0    stevel 	mops->mo_list = motbl;
   2449      0    stevel 	mop = &motbl[count - 1];
   2450      0    stevel 	mop->mo_flags = MO_TAG;
   2451      0    stevel 	mop->mo_name = kmem_alloc(strlen(tag) + 1, KM_SLEEP);
   2452      0    stevel 	(void) strcpy(mop->mo_name, tag);
   2453      0    stevel 	return (mop);
   2454      0    stevel }
   2455      0    stevel 
   2456      0    stevel /*
   2457      0    stevel  * Allow users to set arbitrary "tags" in a vfs's mount options.
   2458      0    stevel  * Broader use within the kernel is discouraged.
   2459      0    stevel  */
   2460      0    stevel int
   2461      0    stevel vfs_settag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
   2462      0    stevel     cred_t *cr)
   2463      0    stevel {
   2464      0    stevel 	vfs_t *vfsp;
   2465      0    stevel 	mntopts_t *mops;
   2466      0    stevel 	mntopt_t *mop;
   2467      0    stevel 	int found = 0;
   2468      0    stevel 	dev_t dev = makedevice(major, minor);
   2469      0    stevel 	int err = 0;
   2470      0    stevel 	char *buf = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
   2471      0    stevel 
   2472      0    stevel 	/*
   2473      0    stevel 	 * Find the desired mounted file system
   2474      0    stevel 	 */
   2475      0    stevel 	vfs_list_lock();
   2476      0    stevel 	vfsp = rootvfs;
   2477      0    stevel 	do {
   2478      0    stevel 		if (vfsp->vfs_dev == dev &&
   2479      0    stevel 		    strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
   2480      0    stevel 			found = 1;
   2481      0    stevel 			break;
   2482      0    stevel 		}
   2483      0    stevel 		vfsp = vfsp->vfs_next;
   2484      0    stevel 	} while (vfsp != rootvfs);
   2485      0    stevel 
   2486      0    stevel 	if (!found) {
   2487      0    stevel 		err = EINVAL;
   2488      0    stevel 		goto out;
   2489      0    stevel 	}
   2490      0    stevel 	err = secpolicy_fs_config(cr, vfsp);
   2491      0    stevel 	if (err != 0)
   2492      0    stevel 		goto out;
   2493      0    stevel 
   2494      0    stevel 	mops = &vfsp->vfs_mntopts;
   2495      0    stevel 	/*
   2496      0    stevel 	 * Add tag if it doesn't already exist
   2497      0    stevel 	 */
   2498      0    stevel 	if ((mop = vfs_hasopt(mops, tag)) == NULL) {
   2499      0    stevel 		int len;
   2500      0    stevel 
   2501      0    stevel 		(void) vfs_buildoptionstr(mops, buf, MAX_MNTOPT_STR);
   2502      0    stevel 		len = strlen(buf);
   2503      0    stevel 		if (len + strlen(tag) + 2 > MAX_MNTOPT_STR) {
   2504      0    stevel 			err = ENAMETOOLONG;
   2505      0    stevel 			goto out;
   2506      0    stevel 		}
   2507      0    stevel 		mop = vfs_addtag(mops, tag);
   2508      0    stevel 	}
   2509      0    stevel 	if ((mop->mo_flags & MO_TAG) == 0) {
   2510      0    stevel 		err = EINVAL;
   2511      0    stevel 		goto out;
   2512      0    stevel 	}
   2513      0    stevel 	vfs_setmntopt_nolock(mops, tag, NULL, 0, 1);
   2514      0    stevel out:
   2515      0    stevel 	vfs_list_unlock();
   2516      0    stevel 	kmem_free(buf, MAX_MNTOPT_STR);
   2517      0    stevel 	return (err);
   2518      0    stevel }
   2519      0    stevel 
   2520      0    stevel /*
   2521      0    stevel  * Allow users to remove arbitrary "tags" in a vfs's mount options.
   2522      0    stevel  * Broader use within the kernel is discouraged.
   2523      0    stevel  */
   2524      0    stevel int
   2525      0    stevel vfs_clrtag(uint_t major, uint_t minor, const char *mntpt, const char *tag,
   2526      0    stevel     cred_t *cr)
   2527      0    stevel {
   2528      0    stevel 	vfs_t *vfsp;
   2529      0    stevel 	mntopt_t *mop;
   2530      0    stevel 	int found = 0;
   2531      0    stevel 	dev_t dev = makedevice(major, minor);
   2532      0    stevel 	int err = 0;
   2533      0    stevel 
   2534      0    stevel 	/*
   2535      0    stevel 	 * Find the desired mounted file system
   2536      0    stevel 	 */
   2537      0    stevel 	vfs_list_lock();
   2538      0    stevel 	vfsp = rootvfs;
   2539      0    stevel 	do {
   2540      0    stevel 		if (vfsp->vfs_dev == dev &&
   2541      0    stevel 		    strcmp(mntpt, refstr_value(vfsp->vfs_mntpt)) == 0) {
   2542      0    stevel 			found = 1;
   2543      0    stevel 			break;
   2544      0    stevel 		}
   2545      0    stevel 		vfsp = vfsp->vfs_next;
   2546      0    stevel 	} while (vfsp != rootvfs);
   2547      0    stevel 
   2548      0    stevel 	if (!found) {
   2549      0    stevel 		err = EINVAL;
   2550      0    stevel 		goto out;
   2551      0    stevel 	}
   2552      0    stevel 	err = secpolicy_fs_config(cr, vfsp);
   2553      0    stevel 	if (err != 0)
   2554      0    stevel 		goto out;
   2555      0    stevel 
   2556      0    stevel 	if ((mop = vfs_hasopt(&vfsp->vfs_mntopts, tag)) == NULL) {
   2557      0    stevel 		err = EINVAL;
   2558      0    stevel 		goto out;
   2559      0    stevel 	}
   2560      0    stevel 	if ((mop->mo_flags & MO_TAG) == 0) {
   2561      0    stevel 		err = EINVAL;
   2562      0    stevel 		goto out;
   2563      0    stevel 	}
   2564      0    stevel 	vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, tag, 1);
   2565      0    stevel out:
   2566      0    stevel 	vfs_list_unlock();
   2567      0    stevel 	return (err);
   2568      0    stevel }
   2569      0    stevel 
   2570      0    stevel /*
   2571      0    stevel  * Function to parse an option string and fill in a mount options table.
   2572      0    stevel  * Unknown options are silently ignored.  The input option string is modified
   2573      0    stevel  * by replacing separators with nulls.  If the create flag is set, options
   2574      0    stevel  * not found in the table are just added on the fly.  The table must have
   2575      0    stevel  * an option slot marked MO_EMPTY to add an option on the fly.
   2576      0    stevel  *
   2577      0    stevel  * This function is *not* for general use by filesystems.
   2578      0    stevel  *
   2579      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2580      0    stevel  *       to protect mops..
   2581      0    stevel  */
   2582      0    stevel void
   2583      0    stevel vfs_parsemntopts(mntopts_t *mops, char *osp, int create)
   2584      0    stevel {
   2585      0    stevel 	char *s = osp, *p, *nextop, *valp, *cp, *ep;
   2586      0    stevel 	int setflg = VFS_NOFORCEOPT;
   2587      0    stevel 
   2588      0    stevel 	if (osp == NULL)
   2589      0    stevel 		return;
   2590      0    stevel 	while (*s != '\0') {
   2591      0    stevel 		p = strchr(s, ',');	/* find next option */
   2592      0    stevel 		if (p == NULL) {
   2593      0    stevel 			cp = NULL;
   2594      0    stevel 			p = s + strlen(s);
   2595      0    stevel 		} else {
   2596      0    stevel 			cp = p;		/* save location of comma */
   2597      0    stevel 			*p++ = '\0';	/* mark end and point to next option */
   2598      0    stevel 		}
   2599      0    stevel 		nextop = p;
   2600      0    stevel 		p = strchr(s, '=');	/* look for value */
   2601      0    stevel 		if (p == NULL) {
   2602      0    stevel 			valp = NULL;	/* no value supplied */
   2603      0    stevel 		} else {
   2604      0    stevel 			ep = p;		/* save location of equals */
   2605      0    stevel 			*p++ = '\0';	/* end option and point to value */
   2606      0    stevel 			valp = p;
   2607      0    stevel 		}
   2608      0    stevel 		/*
   2609      0    stevel 		 * set option into options table
   2610      0    stevel 		 */
   2611      0    stevel 		if (create)
   2612      0    stevel 			setflg |= VFS_CREATEOPT;
   2613      0    stevel 		vfs_setmntopt_nolock(mops, s, valp, setflg, 0);
   2614      0    stevel 		if (cp != NULL)
   2615      0    stevel 			*cp = ',';	/* restore the comma */
   2616      0    stevel 		if (valp != NULL)
   2617      0    stevel 			*ep = '=';	/* restore the equals */
   2618      0    stevel 		s = nextop;
   2619      0    stevel 	}
   2620      0    stevel }
   2621      0    stevel 
   2622      0    stevel /*
   2623      0    stevel  * Function to inquire if an option exists in a mount options table.
   2624      0    stevel  * Returns a pointer to the option if it exists, else NULL.
   2625      0    stevel  *
   2626      0    stevel  * This function is *not* for general use by filesystems.
   2627      0    stevel  *
   2628      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2629      0    stevel  *       to protect mops.
   2630      0    stevel  */
   2631      0    stevel struct mntopt *
   2632      0    stevel vfs_hasopt(const mntopts_t *mops, const char *opt)
   2633      0    stevel {
   2634      0    stevel 	struct mntopt *mop;
   2635      0    stevel 	uint_t i, count;
   2636      0    stevel 
   2637      0    stevel 	count = mops->mo_count;
   2638      0    stevel 	for (i = 0; i < count; i++) {
   2639      0    stevel 		mop = &mops->mo_list[i];
   2640      0    stevel 
   2641      0    stevel 		if (mop->mo_flags & MO_EMPTY)
   2642      0    stevel 			continue;
   2643      0    stevel 		if (strcmp(opt, mop->mo_name) == 0)
   2644      0    stevel 			return (mop);
   2645      0    stevel 	}
   2646      0    stevel 	return (NULL);
   2647      0    stevel }
   2648      0    stevel 
   2649      0    stevel /*
   2650      0    stevel  * Function to inquire if an option is set in a mount options table.
   2651      0    stevel  * Returns non-zero if set and fills in the arg pointer with a pointer to
   2652      0    stevel  * the argument string or NULL if there is no argument string.
   2653      0    stevel  */
   2654      0    stevel static int
   2655      0    stevel vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp)
   2656      0    stevel {
   2657      0    stevel 	struct mntopt *mop;
   2658      0    stevel 	uint_t i, count;
   2659      0    stevel 
   2660      0    stevel 	count = mops->mo_count;
   2661      0    stevel 	for (i = 0; i < count; i++) {
   2662      0    stevel 		mop = &mops->mo_list[i];
   2663      0    stevel 
   2664      0    stevel 		if (mop->mo_flags & MO_EMPTY)
   2665      0    stevel 			continue;
   2666      0    stevel 		if (strcmp(opt, mop->mo_name))
   2667      0    stevel 			continue;
   2668      0    stevel 		if ((mop->mo_flags & MO_SET) == 0)
   2669      0    stevel 			return (0);
   2670      0    stevel 		if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0)
   2671      0    stevel 			*argp = mop->mo_arg;
   2672      0    stevel 		return (1);
   2673      0    stevel 	}
   2674      0    stevel 	return (0);
   2675      0    stevel }
   2676      0    stevel 
   2677      0    stevel 
   2678      0    stevel int
   2679      0    stevel vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp)
   2680      0    stevel {
   2681      0    stevel 	int ret;
   2682      0    stevel 
   2683      0    stevel 	vfs_list_read_lock();
   2684      0    stevel 	ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp);
   2685      0    stevel 	vfs_list_unlock();
   2686      0    stevel 	return (ret);
   2687      0    stevel }
   2688      0    stevel 
   2689      0    stevel 
   2690      0    stevel /*
   2691      0    stevel  * Construct a comma separated string of the options set in the given
   2692      0    stevel  * mount table, return the string in the given buffer.  Return non-zero if
   2693      0    stevel  * the buffer would overflow.
   2694      0    stevel  *
   2695      0    stevel  * This function is *not* for general use by filesystems.
   2696      0    stevel  *
   2697      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2698      0    stevel  *       to protect mp.
   2699      0    stevel  */
   2700      0    stevel int
   2701      0    stevel vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len)
   2702      0    stevel {
   2703      0    stevel 	char *cp;
   2704      0    stevel 	uint_t i;
   2705      0    stevel 
   2706      0    stevel 	buf[0] = '\0';
   2707      0    stevel 	cp = buf;
   2708      0    stevel 	for (i = 0; i < mp->mo_count; i++) {
   2709      0    stevel 		struct mntopt *mop;
   2710      0    stevel 
   2711      0    stevel 		mop = &mp->mo_list[i];
   2712      0    stevel 		if (mop->mo_flags & MO_SET) {
   2713      0    stevel 			int optlen, comma = 0;
   2714      0    stevel 
   2715      0    stevel 			if (buf[0] != '\0')
   2716      0    stevel 				comma = 1;
   2717      0    stevel 			optlen = strlen(mop->mo_name);
   2718      0    stevel 			if (strlen(buf) + comma + optlen + 1 > len)
   2719      0    stevel 				goto err;
   2720      0    stevel 			if (comma)
   2721      0    stevel 				*cp++ = ',';
   2722      0    stevel 			(void) strcpy(cp, mop->mo_name);
   2723      0    stevel 			cp += optlen;
   2724      0    stevel 			/*
   2725      0    stevel 			 * Append option value if there is one
   2726      0    stevel 			 */
   2727      0    stevel 			if (mop->mo_arg != NULL) {
   2728      0    stevel 				int arglen;
   2729      0    stevel 
   2730      0    stevel 				arglen = strlen(mop->mo_arg);
   2731      0    stevel 				if (strlen(buf) + arglen + 2 > len)
   2732      0    stevel 					goto err;
   2733      0    stevel 				*cp++ = '=';
   2734      0    stevel 				(void) strcpy(cp, mop->mo_arg);
   2735      0    stevel 				cp += arglen;
   2736      0    stevel 			}
   2737      0    stevel 		}
   2738      0    stevel 	}
   2739      0    stevel 	return (0);
   2740      0    stevel err:
   2741      0    stevel 	return (EOVERFLOW);
   2742      0    stevel }
   2743      0    stevel 
   2744      0    stevel static void
   2745      0    stevel vfs_freecancelopt(char **moc)
   2746      0    stevel {
   2747      0    stevel 	if (moc != NULL) {
   2748      0    stevel 		int ccnt = 0;
   2749      0    stevel 		char **cp;
   2750      0    stevel 
   2751      0    stevel 		for (cp = moc; *cp != NULL; cp++) {
   2752      0    stevel 			kmem_free(*cp, strlen(*cp) + 1);
   2753      0    stevel 			ccnt++;
   2754      0    stevel 		}
   2755      0    stevel 		kmem_free(moc, (ccnt + 1) * sizeof (char *));
   2756      0    stevel 	}
   2757      0    stevel }
   2758      0    stevel 
   2759      0    stevel static void
   2760      0    stevel vfs_freeopt(mntopt_t *mop)
   2761      0    stevel {
   2762      0    stevel 	if (mop->mo_name != NULL)
   2763      0    stevel 		kmem_free(mop->mo_name, strlen(mop->mo_name) + 1);
   2764      0    stevel 
   2765      0    stevel 	vfs_freecancelopt(mop->mo_cancel);
   2766      0    stevel 
   2767      0    stevel 	if (mop->mo_arg != NULL)
   2768      0    stevel 		kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
   2769      0    stevel }
   2770      0    stevel 
   2771      0    stevel /*
   2772      0    stevel  * Free a mount options table
   2773      0    stevel  *
   2774      0    stevel  * This function is *not* for general use by filesystems.
   2775      0    stevel  *
   2776      0    stevel  * Note: caller is responsible for locking the vfs list, if needed,
   2777      0    stevel  *       to protect mp.
   2778      0    stevel  */
   2779      0    stevel void
   2780      0    stevel vfs_freeopttbl(mntopts_t *mp)
   2781      0    stevel {
   2782      0    stevel 	uint_t i, count;
   2783      0    stevel 
   2784      0    stevel 	count = mp->mo_count;
   2785      0    stevel 	for (i = 0; i < count; i++) {
   2786      0    stevel 		vfs_freeopt(&mp->mo_list[i]);
   2787      0    stevel 	}
   2788      0    stevel 	if (count) {
   2789      0    stevel 		kmem_free(mp->mo_list, sizeof (mntopt_t) * count);
   2790      0    stevel 		mp->mo_count = 0;
   2791      0    stevel 		mp->mo_list = NULL;
   2792      0    stevel 	}
   2793      0    stevel }
   2794      0    stevel 
   2795   4863     praks 
   2796   4863     praks /* ARGSUSED */
   2797   4863     praks static int
   2798   4863     praks vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
   2799   4863     praks 	caller_context_t *ct)
   2800   4863     praks {
   2801   4863     praks 	return (0);
   2802   4863     praks }
   2803   4863     praks 
   2804   4863     praks /* ARGSUSED */
   2805   4863     praks static int
   2806   4863     praks vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
   2807   4863     praks 	caller_context_t *ct)
   2808   4863     praks {
   2809   4863     praks 	return (0);
   2810   4863     praks }
   2811   4863     praks 
   2812   4863     praks /*
   2813   4863     praks  * The dummy vnode is currently used only by file events notification
   2814   4863     praks  * module which is just interested in the timestamps.
   2815   4863     praks  */
   2816   4863     praks /* ARGSUSED */
   2817   4863     praks static int
   2818   5331       amw vfs_mntdummygetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   2819   5331       amw     caller_context_t *ct)
   2820   4863     praks {
   2821   4863     praks 	bzero(vap, sizeof (vattr_t));
   2822   4863     praks 	vap->va_type = VREG;
   2823   4863     praks 	vap->va_nlink = 1;
   2824   4863     praks 	vap->va_ctime = vfs_mnttab_ctime;
   2825   4863     praks 	/*
   2826   4863     praks 	 * it is ok to just copy mtime as the time will be monotonically
   2827   4863     praks 	 * increasing.
   2828   4863     praks 	 */
   2829   4863     praks 	vap->va_mtime = vfs_mnttab_mtime;
   2830   4863     praks 	vap->va_atime = vap->va_mtime;
   2831   4863     praks 	return (0);
   2832   4863     praks }
   2833   4863     praks 
   2834   4863     praks static void
   2835   4863     praks vfs_mnttabvp_setup(void)
   2836   4863     praks {
   2837   4863     praks 	vnode_t *tvp;
   2838   4863     praks 	vnodeops_t *vfs_mntdummyvnops;
   2839   4863     praks 	const fs_operation_def_t mnt_dummyvnodeops_template[] = {
   2840   4863     praks 		VOPNAME_READ, 		{ .vop_read = vfs_mntdummyread },
   2841   4863     praks 		VOPNAME_WRITE, 		{ .vop_write = vfs_mntdummywrite },
   2842   4863     praks 		VOPNAME_GETATTR,	{ .vop_getattr = vfs_mntdummygetattr },
   2843   4863     praks 		VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
   2844   4863     praks 		NULL,			NULL
   2845   4863     praks 	};
   2846   4863     praks 
   2847   4863     praks 	if (vn_make_ops("mnttab", mnt_dummyvnodeops_template,
   2848   4863     praks 	    &vfs_mntdummyvnops) != 0) {
   2849   4863     praks 		cmn_err(CE_WARN, "vfs_mnttabvp_setup: vn_make_ops failed");
   2850   4863     praks 		/* Shouldn't happen, but not bad enough to panic */
   2851   4863     praks 		return;
   2852   4863     praks 	}
   2853   4863     praks 
   2854   4863     praks 	/*
   2855   4863     praks 	 * A global dummy vnode is allocated to represent mntfs files.
   2856   4863     praks 	 * The mntfs file (/etc/mnttab) can be monitored for file events
   2857   4863     praks 	 * and receive an event when mnttab changes. Dummy VOP calls
   2858   4863     praks 	 * will be made on this vnode. The file events notification module
   2859   4863     praks 	 * intercepts this vnode and delivers relevant events.
   2860   4863     praks 	 */
   2861   4863     praks 	tvp = vn_alloc(KM_SLEEP);
   2862   4863     praks 	tvp->v_flag = VNOMOUNT|VNOMAP|VNOSWAP|VNOCACHE;
   2863   4863     praks 	vn_setops(tvp, vfs_mntdummyvnops);
   2864   4863     praks 	tvp->v_type = VREG;
   2865   4863     praks 	/*
   2866   4863     praks 	 * The mnt dummy ops do not reference v_data.
   2867   4863     praks 	 * No other module intercepting this vnode should either.
   2868   4863     praks 	 * Just set it to point to itself.
   2869   4863     praks 	 */
   2870   4863     praks 	tvp->v_data = (caddr_t)tvp;
   2871   4863     praks 	tvp->v_vfsp = rootvfs;
   2872   4863     praks 	vfs_mntdummyvp = tvp;
   2873   4863     praks }
   2874   4863     praks 
   2875   4863     praks /*
   2876   4863     praks  * performs fake read/write ops
   2877   4863     praks  */
   2878   4863     praks static void
   2879   4863     praks vfs_mnttab_rwop(int rw)
   2880   4863     praks {
   2881   4863     praks 	struct uio	uio;
   2882   4863     praks 	struct iovec	iov;
   2883   4863     praks 	char	buf[1];
   2884   4863     praks 
   2885   4863     praks 	if (vfs_mntdummyvp == NULL)
   2886   4863     praks 		return;
   2887   4863     praks 
   2888   4863     praks 	bzero(&uio, sizeof (uio));
   2889   4863     praks 	bzero(&iov, sizeof (iov));
   2890   4863     praks 	iov.iov_base = buf;
   2891   4863     praks 	iov.iov_len = 0;
   2892   4863     praks 	uio.uio_iov = &iov;
   2893   4863     praks 	uio.uio_iovcnt = 1;
   2894   4863     praks 	uio.uio_loffset = 0;
   2895   4863     praks 	uio.uio_segflg = UIO_SYSSPACE;
   2896   4863     praks 	uio.uio_resid = 0;
   2897   4863     praks 	if (rw) {
   2898   4863     praks 		(void) VOP_WRITE(vfs_mntdummyvp, &uio, 0, kcred, NULL);
   2899   4863     praks 	} else {
   2900   4863     praks 		(void) VOP_READ(vfs_mntdummyvp, &uio, 0, kcred, NULL);
   2901   4863     praks 	}
   2902   4863     praks }
   2903   4863     praks 
   2904   4863     praks /*
   2905   4863     praks  * Generate a write operation.
   2906   4863     praks  */
   2907   4863     praks void
   2908   4863     praks vfs_mnttab_writeop(void)
   2909   4863     praks {
   2910   4863     praks 	vfs_mnttab_rwop(1);
   2911   4863     praks }
   2912   4863     praks 
   2913   4863     praks /*
   2914   4863     praks  * Generate a read operation.
   2915   4863     praks  */
   2916   4863     praks void
   2917   4863     praks vfs_mnttab_readop(void)
   2918   4863     praks {
   2919   4863     praks 	vfs_mnttab_rwop(0);
   2920   4863     praks }
   2921   4863     praks 
   2922      0    stevel /*
   2923      0    stevel  * Free any mnttab information recorded in the vfs struct.
   2924      0    stevel  * The vfs must not be on the vfs list.
   2925      0    stevel  */
   2926      0    stevel static void
   2927      0    stevel vfs_freemnttab(struct vfs *vfsp)
   2928      0    stevel {
   2929      0    stevel 	ASSERT(!VFS_ON_LIST(vfsp));
   2930      0    stevel 
   2931      0    stevel 	/*
   2932      0    stevel 	 * Free device and mount point information
   2933      0    stevel 	 */
   2934      0    stevel 	if (vfsp->vfs_mntpt != NULL) {
   2935      0    stevel 		refstr_rele(vfsp->vfs_mntpt);
   2936      0    stevel 		vfsp->vfs_mntpt = NULL;
   2937      0    stevel 	}
   2938      0    stevel 	if (vfsp->vfs_resource != NULL) {
   2939      0    stevel 		refstr_rele(vfsp->vfs_resource);
   2940      0    stevel 		vfsp->vfs_resource = NULL;
   2941      0    stevel 	}
   2942      0    stevel 	/*
   2943      0    stevel 	 * Now free mount options information
   2944      0    stevel 	 */
   2945      0    stevel 	vfs_freeopttbl(&vfsp->vfs_mntopts);
   2946      0    stevel }
   2947      0    stevel 
   2948      0    stevel /*
   2949      0    stevel  * Return the last mnttab modification time
   2950      0    stevel  */
   2951      0    stevel void
   2952      0    stevel vfs_mnttab_modtime(timespec_t *ts)
   2953      0    stevel {
   2954      0    stevel 	ASSERT(RW_LOCK_HELD(&vfslist));
   2955      0    stevel 	*ts = vfs_mnttab_mtime;
   2956      0    stevel }
   2957      0    stevel 
   2958      0    stevel /*
   2959      0    stevel  * See if mnttab is changed
   2960      0    stevel  */
   2961      0    stevel void
   2962      0    stevel vfs_mnttab_poll(timespec_t *old, struct pollhead **phpp)
   2963      0    stevel {
   2964      0    stevel 	int changed;
   2965      0    stevel 
   2966      0    stevel 	*phpp = (struct pollhead *)NULL;
   2967      0    stevel 
   2968      0    stevel 	/*
   2969      0    stevel 	 * Note: don't grab vfs list lock before accessing vfs_mnttab_mtime.
   2970      0    stevel 	 * Can lead to deadlock against vfs_mnttab_modtimeupd(). It is safe
   2971      0    stevel 	 * to not grab the vfs list lock because tv_sec is monotonically
   2972      0    stevel 	 * increasing.
   2973      0    stevel 	 */
   2974      0    stevel 
   2975      0    stevel 	changed = (old->tv_nsec != vfs_mnttab_mtime.tv_nsec) ||
   2976      0    stevel 	    (old->tv_sec != vfs_mnttab_mtime.tv_sec);
   2977      0    stevel 	if (!changed) {
   2978      0    stevel 		*phpp = &vfs_pollhd;
   2979      0    stevel 	}
   2980      0    stevel }
   2981      0    stevel 
   2982  10910    Robert /* Provide a unique and monotonically-increasing timestamp. */
   2983  10910    Robert void
   2984  10910    Robert vfs_mono_time(timespec_t *ts)
   2985  10910    Robert {
   2986  10910    Robert 	static volatile hrtime_t hrt;		/* The saved time. */
   2987  10910    Robert 	hrtime_t	newhrt, oldhrt;		/* For effecting the CAS. */
   2988  10910    Robert 	timespec_t	newts;
   2989  10910    Robert 
   2990  11005    Robert 	/*
   2991  11005    Robert 	 * Try gethrestime() first, but be prepared to fabricate a sensible
   2992  11005    Robert 	 * answer at the first sign of any trouble.
   2993  11005    Robert 	 */
   2994  10910    Robert 	gethrestime(&newts);
   2995  10910    Robert 	newhrt = ts2hrt(&newts);
   2996  11005    Robert 	for (;;) {
   2997  10910    Robert 		oldhrt = hrt;
   2998  11005    Robert 		if (newhrt <= hrt)
   2999  11005    Robert 			newhrt = hrt + 1;
   3000  11005    Robert 		if (cas64((uint64_t *)&hrt, oldhrt, newhrt) == oldhrt)
   3001  11005    Robert 			break;
   3002  11005    Robert 	}
   3003  10910    Robert 	hrt2ts(newhrt, ts);
   3004  10910    Robert }
   3005  10910    Robert 
   3006      0    stevel /*
   3007      0    stevel  * Update the mnttab modification time and wake up any waiters for
   3008      0    stevel  * mnttab changes
   3009      0    stevel  */
   3010      0    stevel void
   3011      0    stevel vfs_mnttab_modtimeupd()
   3012      0    stevel {
   3013  11005    Robert 	hrtime_t oldhrt, newhrt;
   3014  11005    Robert 
   3015      0    stevel 	ASSERT(RW_WRITE_HELD(&vfslist));
   3016  11005    Robert 	oldhrt = ts2hrt(&vfs_mnttab_mtime);
   3017  11005    Robert 	gethrestime(&vfs_mnttab_mtime);
   3018  11005    Robert 	newhrt = ts2hrt(&vfs_mnttab_mtime);
   3019  11005    Robert 	if (oldhrt == (hrtime_t)0)
   3020      0    stevel 		vfs_mnttab_ctime = vfs_mnttab_mtime;
   3021  11005    Robert 	/*
   3022  11005    Robert 	 * Attempt to provide unique mtime (like uniqtime but not).
   3023  11005    Robert 	 */
   3024  11005    Robert 	if (newhrt == oldhrt) {
   3025  11005    Robert 		newhrt++;
   3026  11005    Robert 		hrt2ts(newhrt, &vfs_mnttab_mtime);
   3027  11005    Robert 	}
   3028      0    stevel 	pollwakeup(&vfs_pollhd, (short)POLLRDBAND);
   3029   4863     praks 	vfs_mnttab_writeop();
   3030      0    stevel }
   3031      0    stevel 
   3032      0    stevel int
   3033      0    stevel dounmount(struct vfs *vfsp, int flag, cred_t *cr)
   3034      0    stevel {
   3035      0    stevel 	vnode_t *coveredvp;
   3036      0    stevel 	int error;
   3037   1488       rsb 	extern void teardown_vopstats(vfs_t *);
   3038      0    stevel 
   3039      0    stevel 	/*
   3040      0    stevel 	 * Get covered vnode. This will be NULL if the vfs is not linked
   3041      0    stevel 	 * into the file system name space (i.e., domount() with MNT_NOSPICE).
   3042      0    stevel 	 */
   3043      0    stevel 	coveredvp = vfsp->vfs_vnodecovered;
   3044      0    stevel 	ASSERT(coveredvp == NULL || vn_vfswlock_held(coveredvp));
   3045      0    stevel 
   3046      0    stevel 	/*
   3047      0    stevel 	 * Purge all dnlc entries for this vfs.
   3048      0    stevel 	 */
   3049      0    stevel 	(void) dnlc_purge_vfsp(vfsp, 0);
   3050      0    stevel 
   3051      0    stevel 	/* For forcible umount, skip VFS_SYNC() since it may hang */
   3052      0    stevel 	if ((flag & MS_FORCE) == 0)
   3053      0    stevel 		(void) VFS_SYNC(vfsp, 0, cr);
   3054      0    stevel 
   3055      0    stevel 	/*
   3056      0    stevel 	 * Lock the vfs to maintain fs status quo during unmount.  This
   3057      0    stevel 	 * has to be done after the sync because ufs_update tries to acquire
   3058      0    stevel 	 * the vfs_reflock.
   3059      0    stevel 	 */
   3060      0    stevel 	vfs_lock_wait(vfsp);
   3061      0    stevel 
   3062      0    stevel 	if (error = VFS_UNMOUNT(vfsp, flag, cr)) {
   3063      0    stevel 		vfs_unlock(vfsp);
   3064      0    stevel 		if (coveredvp != NULL)
   3065      0    stevel 			vn_vfsunlock(coveredvp);
   3066      0    stevel 	} else if (coveredvp != NULL) {
   3067   1488       rsb 		teardown_vopstats(vfsp);
   3068      0    stevel 		/*
   3069      0    stevel 		 * vfs_remove() will do a VN_RELE(vfsp->vfs_vnodecovered)
   3070      0    stevel 		 * when it frees vfsp so we do a VN_HOLD() so we can
   3071      0    stevel 		 * continue to use coveredvp afterwards.
   3072      0    stevel 		 */
   3073      0    stevel 		VN_HOLD(coveredvp);
   3074      0    stevel 		vfs_remove(vfsp);
   3075      0    stevel 		vn_vfsunlock(coveredvp);
   3076      0    stevel 		VN_RELE(coveredvp);
   3077      0    stevel 	} else {
   3078   1488       rsb 		teardown_vopstats(vfsp);
   3079      0    stevel 		/*
   3080      0    stevel 		 * Release the reference to vfs that is not linked
   3081      0    stevel 		 * into the name space.
   3082      0    stevel 		 */
   3083      0    stevel 		vfs_unlock(vfsp);
   3084      0    stevel 		VFS_RELE(vfsp);
   3085      0    stevel 	}
   3086      0    stevel 	return (error);
   3087      0    stevel }
   3088      0    stevel 
   3089      0    stevel 
   3090      0    stevel /*
   3091      0    stevel  * Vfs_unmountall() is called by uadmin() to unmount all
   3092      0    stevel  * mounted file systems (except the root file system) during shutdown.
   3093      0    stevel  * It follows the existing locking protocol when traversing the vfs list
   3094      0    stevel  * to sync and unmount vfses. Even though there should be no
   3095      0    stevel  * other thread running while the system is shutting down, it is prudent
   3096      0    stevel  * to still follow the locking protocol.
   3097      0    stevel  */
   3098      0    stevel void
   3099      0    stevel vfs_unmountall(void)
   3100      0    stevel {
   3101      0    stevel 	struct vfs *vfsp;
   3102      0    stevel 	struct vfs *prev_vfsp = NULL;
   3103      0    stevel 	int error;
   3104      0    stevel 
   3105      0    stevel 	/*
   3106      0    stevel 	 * Toss all dnlc entries now so that the per-vfs sync
   3107      0    stevel 	 * and unmount operations don't have to slog through
   3108      0    stevel 	 * a bunch of uninteresting vnodes over and over again.
   3109      0    stevel 	 */
   3110      0    stevel 	dnlc_purge();
   3111      0    stevel 
   3112      0    stevel 	vfs_list_lock();
   3113      0    stevel 	for (vfsp = rootvfs->vfs_prev; vfsp != rootvfs; vfsp = prev_vfsp) {
   3114      0    stevel 		prev_vfsp = vfsp->vfs_prev;
   3115      0    stevel 
   3116      0    stevel 		if (vfs_lock(vfsp) != 0)
   3117      0    stevel 			continue;
   3118      0    stevel 		error = vn_vfswlock(vfsp->vfs_vnodecovered);
   3119      0    stevel 		vfs_unlock(vfsp);
   3120      0    stevel 		if (error)
   3121      0    stevel 			continue;
   3122      0    stevel 
   3123      0    stevel 		vfs_list_unlock();
   3124      0    stevel 
   3125      0    stevel 		(void) VFS_SYNC(vfsp, SYNC_CLOSE, CRED());
   3126      0    stevel 		(void) dounmount(vfsp, 0, CRED());
   3127      0    stevel 
   3128      0    stevel 		/*
   3129      0    stevel 		 * Since we dropped the vfslist lock above we must
   3130      0    stevel 		 * verify that next_vfsp still exists, else start over.
   3131      0    stevel 		 */
   3132      0    stevel 		vfs_list_lock();
   3133      0    stevel 		for (vfsp = rootvfs->vfs_prev;
   3134   3912     lling 		    vfsp != rootvfs; vfsp = vfsp->vfs_prev)
   3135      0    stevel 			if (vfsp == prev_vfsp)
   3136      0    stevel 				break;
   3137      0    stevel 		if (vfsp == rootvfs && prev_vfsp != rootvfs)
   3138      0    stevel 			prev_vfsp = rootvfs->vfs_prev;
   3139      0    stevel 	}
   3140      0    stevel 	vfs_list_unlock();
   3141      0    stevel }
   3142      0    stevel 
   3143      0    stevel /*
   3144      0    stevel  * Called to add an entry to the end of the vfs mount in progress list
   3145      0    stevel  */
   3146      0    stevel void
   3147      0    stevel vfs_addmip(dev_t dev, struct vfs *vfsp)
   3148      0    stevel {
   3149      0    stevel 	struct ipmnt *mipp;
   3150      0    stevel 
   3151      0    stevel 	mipp = (struct ipmnt *)kmem_alloc(sizeof (struct ipmnt), KM_SLEEP);
   3152      0    stevel 	mipp->mip_next = NULL;
   3153      0    stevel 	mipp->mip_dev = dev;
   3154      0    stevel 	mipp->mip_vfsp = vfsp;
   3155      0    stevel 	mutex_enter(&vfs_miplist_mutex);
   3156      0    stevel 	if (vfs_miplist_end != NULL)
   3157      0    stevel 		vfs_miplist_end->mip_next = mipp;
   3158      0    stevel 	else
   3159      0    stevel 		vfs_miplist = mipp;
   3160      0    stevel 	vfs_miplist_end = mipp;
   3161      0    stevel 	mutex_exit(&vfs_miplist_mutex);
   3162      0    stevel }
   3163      0    stevel 
   3164      0    stevel /*
   3165      0    stevel  * Called to remove an entry from the mount in progress list
   3166      0    stevel  * Either because the mount completed or it failed.
   3167      0    stevel  */
   3168      0    stevel void
   3169      0    stevel vfs_delmip(struct vfs *vfsp)
   3170      0    stevel {
   3171      0    stevel 	struct ipmnt *mipp, *mipprev;
   3172      0    stevel 
   3173      0    stevel 	mutex_enter(&vfs_miplist_mutex);
   3174      0    stevel 	mipprev = NULL;
   3175      0    stevel 	for (mipp = vfs_miplist;
   3176   3912     lling 	    mipp && mipp->mip_vfsp != vfsp; mipp = mipp->mip_next) {
   3177      0    stevel 		mipprev = mipp;
   3178      0    stevel 	}
   3179      0    stevel 	if (mipp == NULL)
   3180      0    stevel 		return; /* shouldn't happen */
   3181      0    stevel 	if (mipp == vfs_miplist_end)
   3182      0    stevel 		vfs_miplist_end = mipprev;
   3183      0    stevel 	if (mipprev == NULL)
   3184      0    stevel 		vfs_miplist = mipp->mip_next;
   3185      0    stevel 	else
   3186      0    stevel 		mipprev->mip_next = mipp->mip_next;
   3187      0    stevel 	mutex_exit(&vfs_miplist_mutex);
   3188      0    stevel 	kmem_free(mipp, sizeof (struct ipmnt));
   3189      0    stevel }
   3190      0    stevel 
   3191      0    stevel /*
   3192      0    stevel  * vfs_add is called by a specific filesystem's mount routine to add
   3193      0    stevel  * the new vfs into the vfs list/hash and to cover the mounted-on vnode.
   3194      0    stevel  * The vfs should already have been locked by the caller.
   3195      0    stevel  *
   3196      0    stevel  * coveredvp is NULL if this is the root.
   3197      0    stevel  */
   3198      0    stevel void
   3199      0    stevel vfs_add(vnode_t *coveredvp, struct vfs *vfsp, int mflag)
   3200      0    stevel {
   3201      0    stevel 	int newflag;
   3202      0    stevel 
   3203      0    stevel 	ASSERT(vfs_lock_held(vfsp));
   3204      0    stevel 	VFS_HOLD(vfsp);
   3205      0    stevel 	newflag = vfsp->vfs_flag;
   3206      0    stevel 	if (mflag & MS_RDONLY)
   3207      0    stevel 		newflag |= VFS_RDONLY;
   3208      0    stevel 	else
   3209      0    stevel 		newflag &= ~VFS_RDONLY;
   3210      0    stevel 	if (mflag & MS_NOSUID)
   3211      0    stevel 		newflag |= (VFS_NOSETUID|VFS_NODEVICES);
   3212      0    stevel 	else
   3213