Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1484  ek110237  * Common Development and Distribution License (the "License").
      6   1484  ek110237  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22   9030      Mark  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens #include <sys/types.h>
     27    789    ahrens #include <sys/param.h>
     28    789    ahrens #include <sys/systm.h>
     29    789    ahrens #include <sys/sysmacros.h>
     30    789    ahrens #include <sys/kmem.h>
     31    789    ahrens #include <sys/pathname.h>
     32    789    ahrens #include <sys/vnode.h>
     33    789    ahrens #include <sys/vfs.h>
     34   3898       rsb #include <sys/vfs_opreg.h>
     35    789    ahrens #include <sys/mntent.h>
     36    789    ahrens #include <sys/mount.h>
     37    789    ahrens #include <sys/cmn_err.h>
     38    789    ahrens #include "fs/fs_subr.h"
     39    789    ahrens #include <sys/zfs_znode.h>
     40   3461    ahrens #include <sys/zfs_dir.h>
     41    789    ahrens #include <sys/zil.h>
     42    789    ahrens #include <sys/fs/zfs.h>
     43    789    ahrens #include <sys/dmu.h>
     44    789    ahrens #include <sys/dsl_prop.h>
     45   3912     lling #include <sys/dsl_dataset.h>
     46   4543     marks #include <sys/dsl_deleg.h>
     47    789    ahrens #include <sys/spa.h>
     48    789    ahrens #include <sys/zap.h>
     49    789    ahrens #include <sys/varargs.h>
     50    789    ahrens #include <sys/policy.h>
     51    789    ahrens #include <sys/atomic.h>
     52    789    ahrens #include <sys/mkdev.h>
     53    789    ahrens #include <sys/modctl.h>
     54   4543     marks #include <sys/refstr.h>
     55    789    ahrens #include <sys/zfs_ioctl.h>
     56    789    ahrens #include <sys/zfs_ctldir.h>
     57   5331       amw #include <sys/zfs_fuid.h>
     58   1544  eschrock #include <sys/bootconf.h>
     59    849   bonwick #include <sys/sunddi.h>
     60   1484  ek110237 #include <sys/dnlc.h>
     61   5326  ek110237 #include <sys/dmu_objset.h>
     62   6423   gw25295 #include <sys/spa_boot.h>
     63    789    ahrens 
     64    789    ahrens int zfsfstype;
     65    789    ahrens vfsops_t *zfs_vfsops = NULL;
     66    849   bonwick static major_t zfs_major;
     67    789    ahrens static minor_t zfs_minor;
     68    789    ahrens static kmutex_t	zfs_dev_mtx;
     69   9234    George 
     70   9234    George extern int sys_shutdown;
     71   1544  eschrock 
     72    789    ahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
     73    789    ahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
     74   1544  eschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
     75    789    ahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
     76    789    ahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
     77    789    ahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
     78    789    ahrens static void zfs_freevfs(vfs_t *vfsp);
     79    789    ahrens 
     80    789    ahrens static const fs_operation_def_t zfs_vfsops_template[] = {
     81   3898       rsb 	VFSNAME_MOUNT,		{ .vfs_mount = zfs_mount },
     82   3898       rsb 	VFSNAME_MOUNTROOT,	{ .vfs_mountroot = zfs_mountroot },
     83   3898       rsb 	VFSNAME_UNMOUNT,	{ .vfs_unmount = zfs_umount },
     84   3898       rsb 	VFSNAME_ROOT,		{ .vfs_root = zfs_root },
     85   3898       rsb 	VFSNAME_STATVFS,	{ .vfs_statvfs = zfs_statvfs },
     86   3898       rsb 	VFSNAME_SYNC,		{ .vfs_sync = zfs_sync },
     87   3898       rsb 	VFSNAME_VGET,		{ .vfs_vget = zfs_vget },
     88   3898       rsb 	VFSNAME_FREEVFS,	{ .vfs_freevfs = zfs_freevfs },
     89   3898       rsb 	NULL,			NULL
     90    789    ahrens };
     91    789    ahrens 
     92    789    ahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = {
     93   3898       rsb 	VFSNAME_FREEVFS,	{ .vfs_freevfs =  zfs_freevfs },
     94   3898       rsb 	NULL,			NULL
     95    789    ahrens };
     96    789    ahrens 
     97    789    ahrens /*
     98    789    ahrens  * We need to keep a count of active fs's.
     99    789    ahrens  * This is necessary to prevent our module
    100    789    ahrens  * from being unloaded after a umount -f
    101    789    ahrens  */
    102    789    ahrens static uint32_t	zfs_active_fs_count = 0;
    103    789    ahrens 
    104    789    ahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
    105    789    ahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
    106   3234  ck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
    107   3234  ck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
    108    789    ahrens 
    109   3234  ck153898 /*
    110   4596     lling  * MO_DEFAULT is not used since the default value is determined
    111   4596     lling  * by the equivalent property.
    112   3234  ck153898  */
    113    789    ahrens static mntopt_t mntopts[] = {
    114   3234  ck153898 	{ MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
    115   3234  ck153898 	{ MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
    116   4596     lling 	{ MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
    117    789    ahrens 	{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
    118    789    ahrens };
    119    789    ahrens 
    120    789    ahrens static mntopts_t zfs_mntopts = {
    121    789    ahrens 	sizeof (mntopts) / sizeof (mntopt_t),
    122    789    ahrens 	mntopts
    123    789    ahrens };
    124    789    ahrens 
    125    789    ahrens /*ARGSUSED*/
    126    789    ahrens int
    127    789    ahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
    128    789    ahrens {
    129    789    ahrens 	/*
    130    789    ahrens 	 * Data integrity is job one.  We don't want a compromised kernel
    131    789    ahrens 	 * writing to the storage pool, so we never sync during panic.
    132    789    ahrens 	 */
    133    789    ahrens 	if (panicstr)
    134    789    ahrens 		return (0);
    135    789    ahrens 
    136    789    ahrens 	/*
    137    789    ahrens 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
    138    789    ahrens 	 * to sync metadata, which they would otherwise cache indefinitely.
    139    789    ahrens 	 * Semantically, the only requirement is that the sync be initiated.
    140    789    ahrens 	 * The DMU syncs out txgs frequently, so there's nothing to do.
    141    789    ahrens 	 */
    142    789    ahrens 	if (flag & SYNC_ATTR)
    143    789    ahrens 		return (0);
    144    789    ahrens 
    145    789    ahrens 	if (vfsp != NULL) {
    146    789    ahrens 		/*
    147    789    ahrens 		 * Sync a specific filesystem.
    148    789    ahrens 		 */
    149    789    ahrens 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
    150   9234    George 		dsl_pool_t *dp;
    151    789    ahrens 
    152    789    ahrens 		ZFS_ENTER(zfsvfs);
    153   9234    George 		dp = dmu_objset_pool(zfsvfs->z_os);
    154   9234    George 
    155   9234    George 		/*
    156   9234    George 		 * If the system is shutting down, then skip any
    157   9234    George 		 * filesystems which may exist on a suspended pool.
    158   9234    George 		 */
    159   9234    George 		if (sys_shutdown && spa_suspended(dp->dp_spa)) {
    160   9234    George 			ZFS_EXIT(zfsvfs);
    161   9234    George 			return (0);
    162   9234    George 		}
    163   9234    George 
    164    789    ahrens 		if (zfsvfs->z_log != NULL)
    165   2638    perrin 			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
    166    789    ahrens 		else
    167   9234    George 			txg_wait_synced(dp, 0);
    168    789    ahrens 		ZFS_EXIT(zfsvfs);
    169    789    ahrens 	} else {
    170    789    ahrens 		/*
    171    789    ahrens 		 * Sync all ZFS filesystems.  This is what happens when you
    172    789    ahrens 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
    173    789    ahrens 		 * request by waiting for all pools to commit all dirty data.
    174    789    ahrens 		 */
    175    789    ahrens 		spa_sync_allpools();
    176    789    ahrens 	}
    177   1544  eschrock 
    178   1544  eschrock 	return (0);
    179   1544  eschrock }
    180   1544  eschrock 
    181   1544  eschrock static int
    182   1544  eschrock zfs_create_unique_device(dev_t *dev)
    183   1544  eschrock {
    184   1544  eschrock 	major_t new_major;
    185   1544  eschrock 
    186   1544  eschrock 	do {
    187   1544  eschrock 		ASSERT3U(zfs_minor, <=, MAXMIN32);
    188   1544  eschrock 		minor_t start = zfs_minor;
    189   1544  eschrock 		do {
    190   1544  eschrock 			mutex_enter(&zfs_dev_mtx);
    191   1544  eschrock 			if (zfs_minor >= MAXMIN32) {
    192   1544  eschrock 				/*
    193   1544  eschrock 				 * If we're still using the real major
    194   1544  eschrock 				 * keep out of /dev/zfs and /dev/zvol minor
    195   1544  eschrock 				 * number space.  If we're using a getudev()'ed
    196   1544  eschrock 				 * major number, we can use all of its minors.
    197   1544  eschrock 				 */
    198   1544  eschrock 				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
    199   1544  eschrock 					zfs_minor = ZFS_MIN_MINOR;
    200   1544  eschrock 				else
    201   1544  eschrock 					zfs_minor = 0;
    202   1544  eschrock 			} else {
    203   1544  eschrock 				zfs_minor++;
    204   1544  eschrock 			}
    205   1544  eschrock 			*dev = makedevice(zfs_major, zfs_minor);
    206   1544  eschrock 			mutex_exit(&zfs_dev_mtx);
    207   1544  eschrock 		} while (vfs_devismounted(*dev) && zfs_minor != start);
    208   1544  eschrock 		if (zfs_minor == start) {
    209   1544  eschrock 			/*
    210   1544  eschrock 			 * We are using all ~262,000 minor numbers for the
    211   1544  eschrock 			 * current major number.  Create a new major number.
    212   1544  eschrock 			 */
    213   1544  eschrock 			if ((new_major = getudev()) == (major_t)-1) {
    214   1544  eschrock 				cmn_err(CE_WARN,
    215   1544  eschrock 				    "zfs_mount: Can't get unique major "
    216   1544  eschrock 				    "device number.");
    217   1544  eschrock 				return (-1);
    218   1544  eschrock 			}
    219   1544  eschrock 			mutex_enter(&zfs_dev_mtx);
    220   1544  eschrock 			zfs_major = new_major;
    221   1544  eschrock 			zfs_minor = 0;
    222   1544  eschrock 
    223   1544  eschrock 			mutex_exit(&zfs_dev_mtx);
    224   1544  eschrock 		} else {
    225   1544  eschrock 			break;
    226   1544  eschrock 		}
    227   1544  eschrock 		/* CONSTANTCONDITION */
    228   1544  eschrock 	} while (1);
    229    789    ahrens 
    230    789    ahrens 	return (0);
    231    789    ahrens }
    232    789    ahrens 
    233    789    ahrens static void
    234    789    ahrens atime_changed_cb(void *arg, uint64_t newval)
    235    789    ahrens {
    236    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    237    789    ahrens 
    238    789    ahrens 	if (newval == TRUE) {
    239    789    ahrens 		zfsvfs->z_atime = TRUE;
    240    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
    241    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
    242    789    ahrens 	} else {
    243    789    ahrens 		zfsvfs->z_atime = FALSE;
    244    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
    245    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
    246   3234  ck153898 	}
    247   3234  ck153898 }
    248   3234  ck153898 
    249   3234  ck153898 static void
    250   3234  ck153898 xattr_changed_cb(void *arg, uint64_t newval)
    251   3234  ck153898 {
    252   3234  ck153898 	zfsvfs_t *zfsvfs = arg;
    253   3234  ck153898 
    254   3234  ck153898 	if (newval == TRUE) {
    255   3234  ck153898 		/* XXX locking on vfs_flag? */
    256   3234  ck153898 		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
    257   3234  ck153898 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
    258   3234  ck153898 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
    259   3234  ck153898 	} else {
    260   3234  ck153898 		/* XXX locking on vfs_flag? */
    261   3234  ck153898 		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
    262   3234  ck153898 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
    263   3234  ck153898 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
    264    789    ahrens 	}
    265    789    ahrens }
    266    789    ahrens 
    267    789    ahrens static void
    268    789    ahrens blksz_changed_cb(void *arg, uint64_t newval)
    269    789    ahrens {
    270    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    271    789    ahrens 
    272    789    ahrens 	if (newval < SPA_MINBLOCKSIZE ||
    273    789    ahrens 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
    274    789    ahrens 		newval = SPA_MAXBLOCKSIZE;
    275    789    ahrens 
    276    789    ahrens 	zfsvfs->z_max_blksz = newval;
    277    789    ahrens 	zfsvfs->z_vfs->vfs_bsize = newval;
    278    789    ahrens }
    279    789    ahrens 
    280    789    ahrens static void
    281    789    ahrens readonly_changed_cb(void *arg, uint64_t newval)
    282    789    ahrens {
    283    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    284    789    ahrens 
    285    789    ahrens 	if (newval) {
    286    789    ahrens 		/* XXX locking on vfs_flag? */
    287    789    ahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
    288    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
    289    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
    290    789    ahrens 	} else {
    291    789    ahrens 		/* XXX locking on vfs_flag? */
    292    789    ahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
    293    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
    294    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
    295    789    ahrens 	}
    296    789    ahrens }
    297    789    ahrens 
    298    789    ahrens static void
    299    789    ahrens devices_changed_cb(void *arg, uint64_t newval)
    300    789    ahrens {
    301    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    302    789    ahrens 
    303    789    ahrens 	if (newval == FALSE) {
    304    789    ahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
    305    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
    306    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
    307    789    ahrens 	} else {
    308    789    ahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
    309    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
    310    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
    311    789    ahrens 	}
    312    789    ahrens }
    313    789    ahrens 
    314    789    ahrens static void
    315    789    ahrens setuid_changed_cb(void *arg, uint64_t newval)
    316    789    ahrens {
    317    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    318    789    ahrens 
    319    789    ahrens 	if (newval == FALSE) {
    320    789    ahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
    321    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
    322    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
    323    789    ahrens 	} else {
    324    789    ahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
    325    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
    326    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
    327    789    ahrens 	}
    328    789    ahrens }
    329    789    ahrens 
    330    789    ahrens static void
    331    789    ahrens exec_changed_cb(void *arg, uint64_t newval)
    332    789    ahrens {
    333    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    334    789    ahrens 
    335    789    ahrens 	if (newval == FALSE) {
    336    789    ahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
    337    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
    338    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
    339    789    ahrens 	} else {
    340    789    ahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
    341    789    ahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
    342    789    ahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
    343    789    ahrens 	}
    344    789    ahrens }
    345    789    ahrens 
    346   5331       amw /*
    347   5331       amw  * The nbmand mount option can be changed at mount time.
    348   5331       amw  * We can't allow it to be toggled on live file systems or incorrect
    349   5331       amw  * behavior may be seen from cifs clients
    350   5331       amw  *
    351   5331       amw  * This property isn't registered via dsl_prop_register(), but this callback
    352   5331       amw  * will be called when a file system is first mounted
    353   5331       amw  */
    354   5331       amw static void
    355   5331       amw nbmand_changed_cb(void *arg, uint64_t newval)
    356   5331       amw {
    357   5331       amw 	zfsvfs_t *zfsvfs = arg;
    358   5331       amw 	if (newval == FALSE) {
    359   5331       amw 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
    360   5331       amw 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
    361   5331       amw 	} else {
    362   5331       amw 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
    363   5331       amw 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
    364   5331       amw 	}
    365   5331       amw }
    366   5331       amw 
    367    789    ahrens static void
    368    789    ahrens snapdir_changed_cb(void *arg, uint64_t newval)
    369    789    ahrens {
    370    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    371    789    ahrens 
    372    789    ahrens 	zfsvfs->z_show_ctldir = newval;
    373   5331       amw }
    374   5331       amw 
    375   5331       amw static void
    376   5331       amw vscan_changed_cb(void *arg, uint64_t newval)
    377   5331       amw {
    378   5331       amw 	zfsvfs_t *zfsvfs = arg;
    379   5331       amw 
    380   5331       amw 	zfsvfs->z_vscan = newval;
    381    789    ahrens }
    382    789    ahrens 
    383    789    ahrens static void
    384    789    ahrens acl_mode_changed_cb(void *arg, uint64_t newval)
    385    789    ahrens {
    386    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    387    789    ahrens 
    388    789    ahrens 	zfsvfs->z_acl_mode = newval;
    389    789    ahrens }
    390    789    ahrens 
    391    789    ahrens static void
    392    789    ahrens acl_inherit_changed_cb(void *arg, uint64_t newval)
    393    789    ahrens {
    394    789    ahrens 	zfsvfs_t *zfsvfs = arg;
    395    789    ahrens 
    396    789    ahrens 	zfsvfs->z_acl_inherit = newval;
    397    789    ahrens }
    398    789    ahrens 
    399   1544  eschrock static int
    400   1544  eschrock zfs_register_callbacks(vfs_t *vfsp)
    401   1544  eschrock {
    402   1544  eschrock 	struct dsl_dataset *ds = NULL;
    403   1544  eschrock 	objset_t *os = NULL;
    404   1544  eschrock 	zfsvfs_t *zfsvfs = NULL;
    405   5331       amw 	uint64_t nbmand;
    406   5331       amw 	int readonly, do_readonly = B_FALSE;
    407   5331       amw 	int setuid, do_setuid = B_FALSE;
    408   5331       amw 	int exec, do_exec = B_FALSE;
    409   5331       amw 	int devices, do_devices = B_FALSE;
    410   5331       amw 	int xattr, do_xattr = B_FALSE;
    411   5331       amw 	int atime, do_atime = B_FALSE;
    412   1544  eschrock 	int error = 0;
    413   1544  eschrock 
    414   1544  eschrock 	ASSERT(vfsp);
    415   1544  eschrock 	zfsvfs = vfsp->vfs_data;
    416   1544  eschrock 	ASSERT(zfsvfs);
    417   1544  eschrock 	os = zfsvfs->z_os;
    418   1544  eschrock 
    419   1544  eschrock 	/*
    420   1544  eschrock 	 * The act of registering our callbacks will destroy any mount
    421   1544  eschrock 	 * options we may have.  In order to enable temporary overrides
    422   3234  ck153898 	 * of mount options, we stash away the current values and
    423   1544  eschrock 	 * restore them after we register the callbacks.
    424   1544  eschrock 	 */
    425   1544  eschrock 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
    426   1544  eschrock 		readonly = B_TRUE;
    427   1544  eschrock 		do_readonly = B_TRUE;
    428   1544  eschrock 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
    429   1544  eschrock 		readonly = B_FALSE;
    430   1544  eschrock 		do_readonly = B_TRUE;
    431   1544  eschrock 	}
    432   1544  eschrock 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
    433   1544  eschrock 		devices = B_FALSE;
    434   1544  eschrock 		setuid = B_FALSE;
    435   1544  eschrock 		do_devices = B_TRUE;
    436   1544  eschrock 		do_setuid = B_TRUE;
    437   1544  eschrock 	} else {
    438   1544  eschrock 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
    439   1544  eschrock 			devices = B_FALSE;
    440   1544  eschrock 			do_devices = B_TRUE;
    441   3912     lling 		} else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
    442   1544  eschrock 			devices = B_TRUE;
    443   1544  eschrock 			do_devices = B_TRUE;
    444   1544  eschrock 		}
    445   1544  eschrock 
    446   1544  eschrock 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
    447   1544  eschrock 			setuid = B_FALSE;
    448   1544  eschrock 			do_setuid = B_TRUE;
    449   1544  eschrock 		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
    450   1544  eschrock 			setuid = B_TRUE;
    451   1544  eschrock 			do_setuid = B_TRUE;
    452   1544  eschrock 		}
    453   1544  eschrock 	}
    454   1544  eschrock 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
    455   1544  eschrock 		exec = B_FALSE;
    456   1544  eschrock 		do_exec = B_TRUE;
    457   1544  eschrock 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
    458   1544  eschrock 		exec = B_TRUE;
    459   1544  eschrock 		do_exec = B_TRUE;
    460   1544  eschrock 	}
    461   3234  ck153898 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
    462   3234  ck153898 		xattr = B_FALSE;
    463   3234  ck153898 		do_xattr = B_TRUE;
    464   3234  ck153898 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
    465   3234  ck153898 		xattr = B_TRUE;
    466   3234  ck153898 		do_xattr = B_TRUE;
    467   3234  ck153898 	}
    468   4596     lling 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
    469   4596     lling 		atime = B_FALSE;
    470   4596     lling 		do_atime = B_TRUE;
    471   4596     lling 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
    472   4596     lling 		atime = B_TRUE;
    473   4596     lling 		do_atime = B_TRUE;
    474   4596     lling 	}
    475   1544  eschrock 
    476   1544  eschrock 	/*
    477   5331       amw 	 * nbmand is a special property.  It can only be changed at
    478   5331       amw 	 * mount time.
    479   5331       amw 	 *
    480   5331       amw 	 * This is weird, but it is documented to only be changeable
    481   5331       amw 	 * at mount time.
    482   5331       amw 	 */
    483   5331       amw 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
    484   5331       amw 		nbmand = B_FALSE;
    485   5331       amw 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
    486   5331       amw 		nbmand = B_TRUE;
    487   5331       amw 	} else {
    488   5331       amw 		char osname[MAXNAMELEN];
    489   5331       amw 
    490   5331       amw 		dmu_objset_name(os, osname);
    491   5331       amw 		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
    492   7265    ahrens 		    NULL)) {
    493   7265    ahrens 			return (error);
    494   7265    ahrens 		}
    495   5331       amw 	}
    496   5331       amw 
    497   5331       amw 	/*
    498   1544  eschrock 	 * Register property callbacks.
    499   1544  eschrock 	 *
    500   1544  eschrock 	 * It would probably be fine to just check for i/o error from
    501   1544  eschrock 	 * the first prop_register(), but I guess I like to go
    502   1544  eschrock 	 * overboard...
    503   1544  eschrock 	 */
    504   1544  eschrock 	ds = dmu_objset_ds(os);
    505   1544  eschrock 	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
    506   3234  ck153898 	error = error ? error : dsl_prop_register(ds,
    507   3234  ck153898 	    "xattr", xattr_changed_cb, zfsvfs);
    508   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    509   1544  eschrock 	    "recordsize", blksz_changed_cb, zfsvfs);
    510   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    511   1544  eschrock 	    "readonly", readonly_changed_cb, zfsvfs);
    512   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    513   1544  eschrock 	    "devices", devices_changed_cb, zfsvfs);
    514   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    515   1544  eschrock 	    "setuid", setuid_changed_cb, zfsvfs);
    516   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    517   1544  eschrock 	    "exec", exec_changed_cb, zfsvfs);
    518   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    519   1544  eschrock 	    "snapdir", snapdir_changed_cb, zfsvfs);
    520   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    521   1544  eschrock 	    "aclmode", acl_mode_changed_cb, zfsvfs);
    522   1544  eschrock 	error = error ? error : dsl_prop_register(ds,
    523   1544  eschrock 	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
    524   5331       amw 	error = error ? error : dsl_prop_register(ds,
    525   5331       amw 	    "vscan", vscan_changed_cb, zfsvfs);
    526   1544  eschrock 	if (error)
    527   1544  eschrock 		goto unregister;
    528   1544  eschrock 
    529   1544  eschrock 	/*
    530   1544  eschrock 	 * Invoke our callbacks to restore temporary mount options.
    531   1544  eschrock 	 */
    532   1544  eschrock 	if (do_readonly)
    533   1544  eschrock 		readonly_changed_cb(zfsvfs, readonly);
    534   1544  eschrock 	if (do_setuid)
    535   1544  eschrock 		setuid_changed_cb(zfsvfs, setuid);
    536   1544  eschrock 	if (do_exec)
    537   1544  eschrock 		exec_changed_cb(zfsvfs, exec);
    538   1544  eschrock 	if (do_devices)
    539   1544  eschrock 		devices_changed_cb(zfsvfs, devices);
    540   3234  ck153898 	if (do_xattr)
    541   3234  ck153898 		xattr_changed_cb(zfsvfs, xattr);
    542   4596     lling 	if (do_atime)
    543   4596     lling 		atime_changed_cb(zfsvfs, atime);
    544   5331       amw 
    545   5331       amw 	nbmand_changed_cb(zfsvfs, nbmand);
    546   1544  eschrock 
    547   1544  eschrock 	return (0);
    548   1544  eschrock 
    549   1544  eschrock unregister:
    550   1544  eschrock 	/*
    551   1544  eschrock 	 * We may attempt to unregister some callbacks that are not
    552   1544  eschrock 	 * registered, but this is OK; it will simply return ENOMSG,
    553   1544  eschrock 	 * which we will ignore.
    554   1544  eschrock 	 */
    555   1544  eschrock 	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
    556   3234  ck153898 	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
    557   1544  eschrock 	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
    558   1544  eschrock 	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
    559   1544  eschrock 	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
    560   1544  eschrock 	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
    561   1544  eschrock 	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
    562   1544  eschrock 	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
    563   1544  eschrock 	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
    564   1544  eschrock 	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
    565   1544  eschrock 	    zfsvfs);
    566   5331       amw 	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
    567   1544  eschrock 	return (error);
    568   1544  eschrock 
    569   1544  eschrock }
    570   1544  eschrock 
    571   9396   Matthew static void
    572   9396   Matthew uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
    573   9396   Matthew     int64_t delta, dmu_tx_t *tx)
    574   9396   Matthew {
    575   9396   Matthew 	uint64_t used = 0;
    576   9396   Matthew 	char buf[32];
    577   9396   Matthew 	int err;
    578   9396   Matthew 	uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
    579   9396   Matthew 
    580   9396   Matthew 	if (delta == 0)
    581   9396   Matthew 		return;
    582   9396   Matthew 
    583   9396   Matthew 	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
    584   9396   Matthew 	err = zap_lookup(os, obj, buf, 8, 1, &used);
    585   9396   Matthew 	ASSERT(err == 0 || err == ENOENT);
    586   9396   Matthew 	/* no underflow/overflow */
    587   9396   Matthew 	ASSERT(delta > 0 || used >= -delta);
    588   9396   Matthew 	ASSERT(delta < 0 || used + delta > used);
    589   9396   Matthew 	used += delta;
    590   9396   Matthew 	if (used == 0)
    591   9396   Matthew 		err = zap_remove(os, obj, buf, tx);
    592   9396   Matthew 	else
    593   9396   Matthew 		err = zap_update(os, obj, buf, 8, 1, &used, tx);
    594   9396   Matthew 	ASSERT(err == 0);
    595   9396   Matthew }
    596   9396   Matthew 
    597  10407   Matthew static int
    598  10407   Matthew zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus,
    599  10407   Matthew     uint64_t *userp, uint64_t *groupp)
    600   9396   Matthew {
    601  10407   Matthew 	znode_phys_t *znp = bonus;
    602   9396   Matthew 
    603   9396   Matthew 	if (bonustype != DMU_OT_ZNODE)
    604  10407   Matthew 		return (ENOENT);
    605   9396   Matthew 
    606  10407   Matthew 	*userp = znp->zp_uid;
    607  10407   Matthew 	*groupp = znp->zp_gid;
    608  10407   Matthew 	return (0);
    609   9396   Matthew }
    610   9396   Matthew 
    611   9396   Matthew static void
    612   9396   Matthew fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
    613   9396   Matthew     char *domainbuf, int buflen, uid_t *ridp)
    614   9396   Matthew {
    615   9396   Matthew 	uint64_t fuid;
    616   9396   Matthew 	const char *domain;
    617   9396   Matthew 
    618   9396   Matthew 	fuid = strtonum(fuidstr, NULL);
    619   9396   Matthew 
    620   9396   Matthew 	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
    621   9396   Matthew 	if (domain)
    622   9396   Matthew 		(void) strlcpy(domainbuf, domain, buflen);
    623   9396   Matthew 	else
    624   9396   Matthew 		domainbuf[0] = '\0';
    625   9396   Matthew 	*ridp = FUID_RID(fuid);
    626   9396   Matthew }
    627   9396   Matthew 
    628   9396   Matthew static uint64_t
    629   9396   Matthew zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
    630   9396   Matthew {
    631   9396   Matthew 	switch (type) {
    632   9396   Matthew 	case ZFS_PROP_USERUSED:
    633   9396   Matthew 		return (DMU_USERUSED_OBJECT);
    634   9396   Matthew 	case ZFS_PROP_GROUPUSED:
    635   9396   Matthew 		return (DMU_GROUPUSED_OBJECT);
    636   9396   Matthew 	case ZFS_PROP_USERQUOTA:
    637   9396   Matthew 		return (zfsvfs->z_userquota_obj);
    638   9396   Matthew 	case ZFS_PROP_GROUPQUOTA:
    639   9396   Matthew 		return (zfsvfs->z_groupquota_obj);
    640   9396   Matthew 	}
    641   9396   Matthew 	return (0);
    642   9396   Matthew }
    643   9396   Matthew 
    644   9396   Matthew int
    645   9396   Matthew zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
    646   9396   Matthew     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
    647   9396   Matthew {
    648   9396   Matthew 	int error;
    649   9396   Matthew 	zap_cursor_t zc;
    650   9396   Matthew 	zap_attribute_t za;
    651   9396   Matthew 	zfs_useracct_t *buf = vbuf;
    652   9396   Matthew 	uint64_t obj;
    653   9396   Matthew 
    654   9396   Matthew 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
    655   9396   Matthew 		return (ENOTSUP);
    656   9396   Matthew 
    657   9396   Matthew 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
    658   9396   Matthew 	if (obj == 0) {
    659   9396   Matthew 		*bufsizep = 0;
    660   9396   Matthew 		return (0);
    661   9396   Matthew 	}
    662   9396   Matthew 
    663   9396   Matthew 	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
    664   9396   Matthew 	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
    665   9396   Matthew 	    zap_cursor_advance(&zc)) {
    666   9396   Matthew 		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
    667   9396   Matthew 		    *bufsizep)
    668   9396   Matthew 			break;
    669   9396   Matthew 
    670   9396   Matthew 		fuidstr_to_sid(zfsvfs, za.za_name,
    671   9396   Matthew 		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
    672   9396   Matthew 
    673   9396   Matthew 		buf->zu_space = za.za_first_integer;
    674   9396   Matthew 		buf++;
    675   9396   Matthew 	}
    676   9396   Matthew 	if (error == ENOENT)
    677   9396   Matthew 		error = 0;
    678   9396   Matthew 
    679   9396   Matthew 	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
    680   9396   Matthew 	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
    681   9396   Matthew 	*cookiep = zap_cursor_serialize(&zc);
    682   9396   Matthew 	zap_cursor_fini(&zc);
    683   9396   Matthew 	return (error);
    684   9396   Matthew }
    685   9396   Matthew 
    686   9396   Matthew /*
    687   9396   Matthew  * buf must be big enough (eg, 32 bytes)
    688   9396   Matthew  */
    689   9396   Matthew static int
    690   9396   Matthew id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
    691   9396   Matthew     char *buf, boolean_t addok)
    692   9396   Matthew {
    693   9396   Matthew 	uint64_t fuid;
    694   9396   Matthew 	int domainid = 0;
    695   9396   Matthew 
    696   9396   Matthew 	if (domain && domain[0]) {
    697   9396   Matthew 		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
    698   9396   Matthew 		if (domainid == -1)
    699   9396   Matthew 			return (ENOENT);
    700   9396   Matthew 	}
    701   9396   Matthew 	fuid = FUID_ENCODE(domainid, rid);
    702   9396   Matthew 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
    703   9396   Matthew 	return (0);
    704   9396   Matthew }
    705   9396   Matthew 
    706   9396   Matthew int
    707   9396   Matthew zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
    708   9396   Matthew     const char *domain, uint64_t rid, uint64_t *valp)
    709   9396   Matthew {
    710   9396   Matthew 	char buf[32];
    711   9396   Matthew 	int err;
    712   9396   Matthew 	uint64_t obj;
    713   9396   Matthew 
    714   9396   Matthew 	*valp = 0;
    715   9396   Matthew 
    716   9396   Matthew 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
    717   9396   Matthew 		return (ENOTSUP);
    718   9396   Matthew 
    719   9396   Matthew 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
    720   9396   Matthew 	if (obj == 0)
    721   9396   Matthew 		return (0);
    722   9396   Matthew 
    723   9396   Matthew 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
    724   9396   Matthew 	if (err)
    725   9396   Matthew 		return (err);
    726   9396   Matthew 
    727   9396   Matthew 	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
    728   9396   Matthew 	if (err == ENOENT)
    729   9396   Matthew 		err = 0;
    730   9396   Matthew 	return (err);
    731   9396   Matthew }
    732   9396   Matthew 
    733   9396   Matthew int
    734   9396   Matthew zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
    735   9396   Matthew     const char *domain, uint64_t rid, uint64_t quota)
    736   9396   Matthew {
    737   9396   Matthew 	char buf[32];
    738   9396   Matthew 	int err;
    739   9396   Matthew 	dmu_tx_t *tx;
    740   9396   Matthew 	uint64_t *objp;
    741   9396   Matthew 	boolean_t fuid_dirtied;
    742   9396   Matthew 
    743   9396   Matthew 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
    744   9396   Matthew 		return (EINVAL);
    745   9396   Matthew 
    746   9396   Matthew 	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
    747   9396   Matthew 		return (ENOTSUP);
    748   9396   Matthew 
    749   9396   Matthew 	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
    750   9396   Matthew 	    &zfsvfs->z_groupquota_obj;
    751   9396   Matthew 
    752   9396   Matthew 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
    753   9396   Matthew 	if (err)
    754   9396   Matthew 		return (err);
    755   9396   Matthew 	fuid_dirtied = zfsvfs->z_fuid_dirty;
    756   9396   Matthew 
    757   9396   Matthew 	tx = dmu_tx_create(zfsvfs->z_os);
    758   9396   Matthew 	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
    759   9396   Matthew 	if (*objp == 0) {
    760   9396   Matthew 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
    761   9396   Matthew 		    zfs_userquota_prop_prefixes[type]);
    762   9396   Matthew 	}
    763   9396   Matthew 	if (fuid_dirtied)
    764   9396   Matthew 		zfs_fuid_txhold(zfsvfs, tx);
    765   9396   Matthew 	err = dmu_tx_assign(tx, TXG_WAIT);
    766   9396   Matthew 	if (err) {
    767   9396   Matthew 		dmu_tx_abort(tx);
    768   9396   Matthew 		return (err);
    769   9396   Matthew 	}
    770   9396   Matthew 
    771   9396   Matthew 	mutex_enter(&zfsvfs->z_lock);
    772   9396   Matthew 	if (*objp == 0) {
    773   9396   Matthew 		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
    774   9396   Matthew 		    DMU_OT_NONE, 0, tx);
    775   9396   Matthew 		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
    776   9396   Matthew 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
    777   9396   Matthew 	}
    778   9396   Matthew 	mutex_exit(&zfsvfs->z_lock);
    779   9396   Matthew 
    780   9396   Matthew 	if (quota == 0) {
    781   9396   Matthew 		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
    782   9396   Matthew 		if (err == ENOENT)
    783   9396   Matthew 			err = 0;
    784   9396   Matthew 	} else {
    785   9396   Matthew 		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
    786   9396   Matthew 	}
    787   9396   Matthew 	ASSERT(err == 0);
    788   9396   Matthew 	if (fuid_dirtied)
    789   9396   Matthew 		zfs_fuid_sync(zfsvfs, tx);
    790   9396   Matthew 	dmu_tx_commit(tx);
    791   9396   Matthew 	return (err);
    792   9396   Matthew }
    793   9396   Matthew 
    794   9396   Matthew boolean_t
    795   9396   Matthew zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
    796   9396   Matthew {
    797   9396   Matthew 	char buf[32];
    798   9396   Matthew 	uint64_t used, quota, usedobj, quotaobj;
    799   9396   Matthew 	int err;
    800   9396   Matthew 
    801   9396   Matthew 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
    802   9396   Matthew 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
    803   9396   Matthew 
    804   9396   Matthew 	if (quotaobj == 0 || zfsvfs->z_replay)
    805   9396   Matthew 		return (B_FALSE);
    806   9396   Matthew 
    807   9396   Matthew 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
    808   9396   Matthew 	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
    809   9396   Matthew 	if (err != 0)
    810   9396   Matthew 		return (B_FALSE);
    811   9396   Matthew 
    812   9396   Matthew 	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
    813   9396   Matthew 	if (err != 0)
    814   9396   Matthew 		return (B_FALSE);
    815   9396   Matthew 	return (used >= quota);
    816   9396   Matthew }
    817   9396   Matthew 
    818   9396   Matthew int
    819  11185      Sean zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
    820   9396   Matthew {
    821   9396   Matthew 	objset_t *os;
    822   9396   Matthew 	zfsvfs_t *zfsvfs;
    823   9396   Matthew 	uint64_t zval;
    824   9396   Matthew 	int i, error;
    825   9396   Matthew 
    826  10298   Matthew 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
    827  10298   Matthew 
    828  10298   Matthew 	/*
    829  10298   Matthew 	 * We claim to always be readonly so we can open snapshots;
    830  10298   Matthew 	 * other ZPL code will prevent us from writing to snapshots.
    831  10298   Matthew 	 */
    832  10298   Matthew 	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
    833  10298   Matthew 	if (error) {
    834  10298   Matthew 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
    835   9396   Matthew 		return (error);
    836   9396   Matthew 	}
    837   9396   Matthew 
    838   9396   Matthew 	/*
    839   9396   Matthew 	 * Initialize the zfs-specific filesystem structure.
    840   9396   Matthew 	 * Should probably make this a kmem cache, shuffle fields,
    841   9396   Matthew 	 * and just bzero up to z_hold_mtx[].
    842   9396   Matthew 	 */
    843   9396   Matthew 	zfsvfs->z_vfs = NULL;
    844   9396   Matthew 	zfsvfs->z_parent = zfsvfs;
    845   9396   Matthew 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
    846   9396   Matthew 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
    847   9396   Matthew 	zfsvfs->z_os = os;
    848   9396   Matthew 
    849   9396   Matthew 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
    850   9396   Matthew 	if (error) {
    851   9396   Matthew 		goto out;
    852   9396   Matthew 	} else if (zfsvfs->z_version > ZPL_VERSION) {
    853   9396   Matthew 		(void) printf("Mismatched versions:  File system "
    854   9396   Matthew 		    "is version %llu on-disk format, which is "
    855   9396   Matthew 		    "incompatible with this software version %lld!",
    856   9396   Matthew 		    (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
    857   9396   Matthew 		error = ENOTSUP;
    858   9396   Matthew 		goto out;
    859   9396   Matthew 	}
    860   9396   Matthew 
    861   9396   Matthew 	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
    862   9396   Matthew 		goto out;
    863   9396   Matthew 	zfsvfs->z_norm = (int)zval;
    864   9396   Matthew 
    865   9396   Matthew 	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
    866   9396   Matthew 		goto out;
    867   9396   Matthew 	zfsvfs->z_utf8 = (zval != 0);
    868   9396   Matthew 
    869   9396   Matthew 	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
    870   9396   Matthew 		goto out;
    871   9396   Matthew 	zfsvfs->z_case = (uint_t)zval;
    872   9396   Matthew 
    873   9396   Matthew 	/*
    874   9396   Matthew 	 * Fold case on file systems that are always or sometimes case
    875   9396   Matthew 	 * insensitive.
    876   9396   Matthew 	 */
    877   9396   Matthew 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
    878   9396   Matthew 	    zfsvfs->z_case == ZFS_CASE_MIXED)
    879   9396   Matthew 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
    880   9396   Matthew 
    881   9396   Matthew 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
    882   9396   Matthew 
    883   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
    884   9396   Matthew 	    &zfsvfs->z_root);
    885   9396   Matthew 	if (error)
    886   9396   Matthew 		goto out;
    887   9396   Matthew 	ASSERT(zfsvfs->z_root != 0);
    888   9396   Matthew 
    889   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
    890   9396   Matthew 	    &zfsvfs->z_unlinkedobj);
    891   9396   Matthew 	if (error)
    892   9396   Matthew 		goto out;
    893   9396   Matthew 
    894   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ,
    895   9396   Matthew 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
    896   9396   Matthew 	    8, 1, &zfsvfs->z_userquota_obj);
    897   9396   Matthew 	if (error && error != ENOENT)
    898   9396   Matthew 		goto out;
    899   9396   Matthew 
    900   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ,
    901   9396   Matthew 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
    902   9396   Matthew 	    8, 1, &zfsvfs->z_groupquota_obj);
    903   9396   Matthew 	if (error && error != ENOENT)
    904   9396   Matthew 		goto out;
    905   9396   Matthew 
    906   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
    907   9396   Matthew 	    &zfsvfs->z_fuid_obj);
    908   9396   Matthew 	if (error && error != ENOENT)
    909   9396   Matthew 		goto out;
    910   9396   Matthew 
    911   9396   Matthew 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
    912   9396   Matthew 	    &zfsvfs->z_shares_dir);
    913   9396   Matthew 	if (error && error != ENOENT)
    914   9396   Matthew 		goto out;
    915   9396   Matthew 
    916   9396   Matthew 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
    917   9396   Matthew 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
    918   9396   Matthew 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
    919   9396   Matthew 	    offsetof(znode_t, z_link_node));
    920   9396   Matthew 	rrw_init(&zfsvfs->z_teardown_lock);
    921   9396   Matthew 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
    922   9396   Matthew 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
    923   9396   Matthew 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
    924   9396   Matthew 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
    925   9396   Matthew 
    926  11185      Sean 	*zfvp = zfsvfs;
    927   9396   Matthew 	return (0);
    928   9396   Matthew 
    929   9396   Matthew out:
    930  10298   Matthew 	dmu_objset_disown(os, zfsvfs);
    931  11185      Sean 	*zfvp = NULL;
    932   9396   Matthew 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
    933   9396   Matthew 	return (error);
    934   9396   Matthew }
    935   9396   Matthew 
    936   1544  eschrock static int
    937   5326  ek110237 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
    938   5326  ek110237 {
    939   5326  ek110237 	int error;
    940   5326  ek110237 
    941   5326  ek110237 	error = zfs_register_callbacks(zfsvfs->z_vfs);
    942   5326  ek110237 	if (error)
    943   5326  ek110237 		return (error);
    944   5326  ek110237 
    945   5326  ek110237 	/*
    946   5326  ek110237 	 * Set the objset user_ptr to track its zfsvfs.
    947   5326  ek110237 	 */
    948  10298   Matthew 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
    949   5326  ek110237 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
    950  10298   Matthew 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
    951   5326  ek110237 
    952   9292      Neil 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
    953   9292      Neil 	if (zil_disable) {
    954  10685    George 		zil_destroy(zfsvfs->z_log, B_FALSE);
    955   9292      Neil 		zfsvfs->z_log = NULL;
    956   9292      Neil 	}
    957   9292      Neil 
    958   5326  ek110237 	/*
    959   5326  ek110237 	 * If we are not mounting (ie: online recv), then we don't
    960   5326  ek110237 	 * have to worry about replaying the log as we blocked all
    961   5326  ek110237 	 * operations out since we closed the ZIL.
    962   5326  ek110237 	 */
    963   5326  ek110237 	if (mounting) {
    964   7638      Neil 		boolean_t readonly;
    965   7638      Neil 
    966   5326  ek110237 		/*
    967   5326  ek110237 		 * During replay we remove the read only flag to
    968   5326  ek110237 		 * allow replays to succeed.
    969   5326  ek110237 		 */
    970   5326  ek110237 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
    971   8227      Neil 		if (readonly != 0)
    972   8227      Neil 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
    973   8227      Neil 		else
    974   8227      Neil 			zfs_unlinked_drain(zfsvfs);
    975   5326  ek110237 
    976   9292      Neil 		if (zfsvfs->z_log) {
    977   8227      Neil 			/*
    978   8227      Neil 			 * Parse and replay the intent log.
    979   8227      Neil 			 *
    980   8227      Neil 			 * Because of ziltest, this must be done after
    981   8227      Neil 			 * zfs_unlinked_drain().  (Further note: ziltest
    982   8227      Neil 			 * doesn't use readonly mounts, where
    983   8227      Neil 			 * zfs_unlinked_drain() isn't called.)  This is because
    984   8227      Neil 			 * ziltest causes spa_sync() to think it's committed,
    985   8227      Neil 			 * but actually it is not, so the intent log contains
    986   8227      Neil 			 * many txg's worth of changes.
    987   8227      Neil 			 *
    988   8227      Neil 			 * In particular, if object N is in the unlinked set in
    989   8227      Neil 			 * the last txg to actually sync, then it could be
    990   8227      Neil 			 * actually freed in a later txg and then reallocated
    991   8227      Neil 			 * in a yet later txg.  This would write a "create
    992   8227      Neil 			 * object N" record to the intent log.  Normally, this
    993   8227      Neil 			 * would be fine because the spa_sync() would have
    994   8227      Neil 			 * written out the fact that object N is free, before
    995   8227      Neil 			 * we could write the "create object N" intent log
    996   8227      Neil 			 * record.
    997   8227      Neil 			 *
    998   8227      Neil 			 * But when we are in ziltest mode, we advance the "open
    999   8227      Neil 			 * txg" without actually spa_sync()-ing the changes to
   1000   8227      Neil 			 * disk.  So we would see that object N is still
   1001   8227      Neil 			 * allocated and in the unlinked set, and there is an
   1002   8227      Neil 			 * intent log record saying to allocate it.
   1003   8227      Neil 			 */
   1004   8227      Neil 			zfsvfs->z_replay = B_TRUE;
   1005   8227      Neil 			zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
   1006   8227      Neil 			zfsvfs->z_replay = B_FALSE;
   1007   8227      Neil 		}
   1008   5326  ek110237 		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
   1009   5326  ek110237 	}
   1010   5326  ek110237 
   1011   5326  ek110237 	return (0);
   1012   5326  ek110237 }
   1013   5326  ek110237 
   1014   9396   Matthew void
   1015   9396   Matthew zfsvfs_free(zfsvfs_t *zfsvfs)
   1016   6083  ek110237 {
   1017   9396   Matthew 	int i;
   1018   9788       Tom 	extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
   1019   9788       Tom 
   1020   9788       Tom 	/*
   1021   9788       Tom 	 * This is a barrier to prevent the filesystem from going away in
   1022   9788       Tom 	 * zfs_znode_move() until we can safely ensure that the filesystem is
   1023   9788       Tom 	 * not unmounted. We consider the filesystem valid before the barrier
   1024   9788       Tom 	 * and invalid after the barrier.
   1025   9788       Tom 	 */
   1026   9788       Tom 	rw_enter(&zfsvfs_lock, RW_READER);
   1027   9788       Tom 	rw_exit(&zfsvfs_lock);
   1028   9396   Matthew 
   1029   9396   Matthew 	zfs_fuid_destroy(zfsvfs);
   1030   9396   Matthew 
   1031   6083  ek110237 	mutex_destroy(&zfsvfs->z_znodes_lock);
   1032   9030      Mark 	mutex_destroy(&zfsvfs->z_lock);
   1033   6083  ek110237 	list_destroy(&zfsvfs->z_all_znodes);
   1034   6083  ek110237 	rrw_destroy(&zfsvfs->z_teardown_lock);
   1035   6083  ek110237 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
   1036   6083  ek110237 	rw_destroy(&zfsvfs->z_fuid_lock);
   1037   9396   Matthew 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
   1038   9396   Matthew 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
   1039   6083  ek110237 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
   1040   9396   Matthew }
   1041   9396   Matthew 
   1042   9396   Matthew static void
   1043   9396   Matthew zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
   1044   9396   Matthew {
   1045   9396   Matthew 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
   1046   9396   Matthew 	if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
   1047   9396   Matthew 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
   1048   9396   Matthew 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
   1049   9396   Matthew 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
   1050   9396   Matthew 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
   1051   9749       Tim 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
   1052  10793       dai 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
   1053   9396   Matthew 	}
   1054   6083  ek110237 }
   1055   6083  ek110237 
   1056   5326  ek110237 static int
   1057   7046    ahrens zfs_domount(vfs_t *vfsp, char *osname)
   1058   1544  eschrock {
   1059   1544  eschrock 	dev_t mount_dev;
   1060   9396   Matthew 	uint64_t recordsize, fsid_guid;
   1061   1544  eschrock 	int error = 0;
   1062   1544  eschrock 	zfsvfs_t *zfsvfs;
   1063   1544  eschrock 
   1064   1544  eschrock 	ASSERT(vfsp);
   1065   1544  eschrock 	ASSERT(osname);
   1066   1544  eschrock 
   1067  10298   Matthew 	error = zfsvfs_create(osname, &zfsvfs);
   1068   9396   Matthew 	if (error)
   1069   9396   Matthew 		return (error);
   1070   1544  eschrock 	zfsvfs->z_vfs = vfsp;
   1071   1544  eschrock 
   1072   1544  eschrock 	/* Initialize the generic filesystem structure. */
   1073   1544  eschrock 	vfsp->vfs_bcount = 0;
   1074   1544  eschrock 	vfsp->vfs_data = NULL;
   1075   1544  eschrock 
   1076   1544  eschrock 	if (zfs_create_unique_device(&mount_dev) == -1) {
   1077   1544  eschrock 		error = ENODEV;
   1078   1544  eschrock 		goto out;
   1079   1544  eschrock 	}
   1080   1544  eschrock 	ASSERT(vfs_devismounted(mount_dev) == 0);
   1081   1544  eschrock 
   1082   1544  eschrock 	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
   1083   1544  eschrock 	    NULL))
   1084   1544  eschrock 		goto out;
   1085   1544  eschrock 
   1086   1544  eschrock 	vfsp->vfs_dev = mount_dev;
   1087   1544  eschrock 	vfsp->vfs_fstype = zfsfstype;
   1088   1544  eschrock 	vfsp->vfs_bsize = recordsize;
   1089   1544  eschrock 	vfsp->vfs_flag |= VFS_NOTRUNC;
   1090   1544  eschrock 	vfsp->vfs_data = zfsvfs;
   1091   1544  eschrock 
   1092   9396   Matthew 	/*
   1093   9396   Matthew 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
   1094   9396   Matthew 	 * separates our fsid from any other filesystem types, and a
   1095   9396   Matthew 	 * 56-bit objset unique ID.  The objset unique ID is unique to
   1096   9396   Matthew 	 * all objsets open on this system, provided by unique_create().
   1097   9396   Matthew 	 * The 8-bit fs type must be put in the low bits of fsid[1]
   1098   9396   Matthew 	 * because that's where other Solaris filesystems put it.
   1099   9396   Matthew 	 */
   1100   9396   Matthew 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
   1101   9396   Matthew 	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
   1102   9396   Matthew 	vfsp->vfs_fsid.val[0] = fsid_guid;
   1103   9396   Matthew 	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
   1104   9396   Matthew 	    zfsfstype & 0xFF;
   1105   1544  eschrock 
   1106   5331       amw 	/*
   1107   5331       amw 	 * Set features for file system.
   1108   5331       amw 	 */
   1109   9396   Matthew 	zfs_set_fuid_feature(zfsvfs);
   1110   5498      timh 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
   1111   5498      timh 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
   1112   5498      timh 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
   1113   5498      timh 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
   1114   5498      timh 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
   1115   5498      timh 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
   1116   5498      timh 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
   1117   5498      timh 	}
   1118   5331       amw 
   1119   1544  eschrock 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
   1120   5331       amw 		uint64_t pval;
   1121   3234  ck153898 
   1122   1544  eschrock 		atime_changed_cb(zfsvfs, B_FALSE);
   1123   1544  eschrock 		readonly_changed_cb(zfsvfs, B_TRUE);
   1124   5331       amw 		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
   1125   3234  ck153898 			goto out;
   1126   5331       amw 		xattr_changed_cb(zfsvfs, pval);
   1127   1544  eschrock 		zfsvfs->z_issnap = B_TRUE;
   1128   9688   Matthew 
   1129  10298   Matthew 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
   1130   9688   Matthew 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
   1131  10298   Matthew 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
   1132   1544  eschrock 	} else {
   1133   5326  ek110237 		error = zfsvfs_setup(zfsvfs, B_TRUE);
   1134   1544  eschrock 	}
   1135   1544  eschrock 
   1136   1544  eschrock 	if (!zfsvfs->z_issnap)
   1137   1544  eschrock 		zfsctl_create(zfsvfs);
   1138   1544  eschrock out:
   1139   1544  eschrock 	if (error) {
   1140  10298   Matthew 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
   1141   9396   Matthew 		zfsvfs_free(zfsvfs);
   1142   1544  eschrock 	} else {
   1143   1544  eschrock 		atomic_add_32(&zfs_active_fs_count, 1);
   1144   1544  eschrock 	}
   1145   1544  eschrock 
   1146   1544  eschrock 	return (error);
   1147   1544  eschrock }
   1148   1544  eschrock 
   1149   1544  eschrock void
   1150   1544  eschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
   1151   1544  eschrock {
   1152   1544  eschrock 	objset_t *os = zfsvfs->z_os;
   1153   1544  eschrock 	struct dsl_dataset *ds;
   1154   1544  eschrock 
   1155   1544  eschrock 	/*
   1156   1544  eschrock 	 * Unregister properties.
   1157   1544  eschrock 	 */
   1158   1544  eschrock 	if (!dmu_objset_is_snapshot(os)) {
   1159   1544  eschrock 		ds = dmu_objset_ds(os);
   1160   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
   1161   3234  ck153898 		    zfsvfs) == 0);
   1162   3234  ck153898 
   1163   3234  ck153898 		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
   1164   1544  eschrock 		    zfsvfs) == 0);
   1165   1544  eschrock 
   1166   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
   1167   1544  eschrock 		    zfsvfs) == 0);
   1168   1544  eschrock 
   1169   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
   1170   1544  eschrock 		    zfsvfs) == 0);
   1171   1544  eschrock 
   1172   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
   1173   1544  eschrock 		    zfsvfs) == 0);
   1174   1544  eschrock 
   1175   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
   1176   1544  eschrock 		    zfsvfs) == 0);
   1177   1544  eschrock 
   1178   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
   1179   1544  eschrock 		    zfsvfs) == 0);
   1180   1544  eschrock 
   1181   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
   1182   1544  eschrock 		    zfsvfs) == 0);
   1183   1544  eschrock 
   1184   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
   1185   1544  eschrock 		    zfsvfs) == 0);
   1186   1544  eschrock 
   1187   1544  eschrock 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
   1188   1544  eschrock 		    acl_inherit_changed_cb, zfsvfs) == 0);
   1189   5331       amw 
   1190   5331       amw 		VERIFY(dsl_prop_unregister(ds, "vscan",
   1191   5331       amw 		    vscan_changed_cb, zfsvfs) == 0);
   1192   1544  eschrock 	}
   1193   1544  eschrock }
   1194   1544  eschrock 
   1195   3912     lling /*
   1196   3912     lling  * Convert a decimal digit string to a uint64_t integer.
   1197   3912     lling  */
   1198   3912     lling static int
   1199   3912     lling str_to_uint64(char *str, uint64_t *objnum)
   1200   3912     lling {
   1201   3912     lling 	uint64_t num = 0;
   1202   3912     lling 
   1203   3912     lling 	while (*str) {
   1204   3912     lling 		if (*str < '0' || *str > '9')
   1205   3912     lling 			return (EINVAL);
   1206   3912     lling 
   1207   3912     lling 		num = num*10 + *str++ - '0';
   1208   3912     lling 	}
   1209   3912     lling 
   1210   3912     lling 	*objnum = num;
   1211   3912     lling 	return (0);
   1212   3912     lling }
   1213   3912     lling 
   1214   3912     lling /*
   1215   3912     lling  * The boot path passed from the boot loader is in the form of
   1216   3912     lling  * "rootpool-name/root-filesystem-object-number'. Convert this
   1217   3912     lling  * string to a dataset name: "rootpool-name/root-filesystem-name".
   1218   3912     lling  */
   1219   3912     lling static int
   1220   6423   gw25295 zfs_parse_bootfs(char *bpath, char *outpath)
   1221   3912     lling {
   1222   3912     lling 	char *slashp;
   1223   3912     lling 	uint64_t objnum;
   1224   3912     lling 	int error;
   1225   3912     lling 
   1226   3912     lling 	if (*bpath == 0 || *bpath == '/')
   1227   3912     lling 		return (EINVAL);
   1228   3912     lling 
   1229   7656    Sherry 	(void) strcpy(outpath, bpath);
   1230   7656    Sherry 
   1231   3912     lling 	slashp = strchr(bpath, '/');
   1232   3912     lling 
   1233   3912     lling 	/* if no '/', just return the pool name */
   1234   3912     lling 	if (slashp == NULL) {
   1235   3912     lling 		return (0);
   1236   3912     lling 	}
   1237   3912     lling 
   1238   7656    Sherry 	/* if not a number, just return the root dataset name */
   1239   7656    Sherry 	if (str_to_uint64(slashp+1, &objnum)) {
   1240   7656    Sherry 		return (0);
   1241   7656    Sherry 	}
   1242   3912     lling 
   1243   3912     lling 	*slashp = '\0';
   1244   3912     lling 	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
   1245   3912     lling 	*slashp = '/';
   1246   3912     lling 
   1247   3912     lling 	return (error);
   1248   3912     lling }
   1249   3912     lling 
   1250  10972       Ric /*
   1251  10972       Ric  * zfs_check_global_label:
   1252  10972       Ric  *	Check that the hex label string is appropriate for the dataset
   1253  10972       Ric  *	being mounted into the global_zone proper.
   1254  10972       Ric  *
   1255  10972       Ric  *	Return an error if the hex label string is not default or
   1256  10972       Ric  *	admin_low/admin_high.  For admin_low labels, the corresponding
   1257  10972       Ric  *	dataset must be readonly.
   1258  10972       Ric  */
   1259  10972       Ric int
   1260  10972       Ric zfs_check_global_label(const char *dsname, const char *hexsl)
   1261  10972       Ric {
   1262  10972       Ric 	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
   1263  10972       Ric 		return (0);
   1264  10972       Ric 	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
   1265  10972       Ric 		return (0);
   1266  10972       Ric 	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
   1267  10972       Ric 		/* must be readonly */
   1268  10972       Ric 		uint64_t rdonly;
   1269  10972       Ric 
   1270  10972       Ric 		if (dsl_prop_get_integer(dsname,
   1271  10972       Ric 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
   1272  10972       Ric 			return (EACCES);
   1273  10972       Ric 		return (rdonly ? 0 : EACCES);
   1274  10972       Ric 	}
   1275  10972       Ric 	return (EACCES);
   1276  10972       Ric }
   1277  10972       Ric 
   1278  10972       Ric /*
   1279  10972       Ric  * zfs_mount_label_policy:
   1280  10972       Ric  *	Determine whether the mount is allowed according to MAC check.
   1281  10972       Ric  *	by comparing (where appropriate) label of the dataset against
   1282  10972       Ric  *	the label of the zone being mounted into.  If the dataset has
   1283  10972       Ric  *	no label, create one.
   1284  10972       Ric  *
   1285  10972       Ric  *	Returns:
   1286  10972       Ric  *		 0 :	access allowed
   1287  10972       Ric  *		>0 :	error code, such as EACCES
   1288  10972       Ric  */
   1289  10972       Ric static int
   1290  10972       Ric zfs_mount_label_policy(vfs_t *vfsp, char *osname)
   1291  10972       Ric {
   1292  10972       Ric 	int		error, retv;
   1293  10972       Ric 	zone_t		*mntzone = NULL;
   1294  10972       Ric 	ts_label_t	*mnt_tsl;
   1295  10972       Ric 	bslabel_t	*mnt_sl;
   1296  10972       Ric 	bslabel_t	ds_sl;
   1297  10972       Ric 	char		ds_hexsl[MAXNAMELEN];
   1298  10972       Ric 
   1299  10972       Ric 	retv = EACCES;				/* assume the worst */
   1300  10972       Ric 
   1301  10972       Ric 	/*
   1302  10972       Ric 	 * Start by getting the dataset label if it exists.
   1303  10972       Ric 	 */
   1304  10972       Ric 	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
   1305  10972       Ric 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
   1306  10972       Ric 	if (error)
   1307  10972       Ric 		return (EACCES);
   1308  10972       Ric 
   1309  10972       Ric 	/*
   1310  10972       Ric 	 * If labeling is NOT enabled, then disallow the mount of datasets
   1311  10972       Ric 	 * which have a non-default label already.  No other label checks
   1312  10972       Ric 	 * are needed.
   1313  10972       Ric 	 */
   1314  10972       Ric 	if (!is_system_labeled()) {
   1315  10972       Ric 		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
   1316  10972       Ric 			return (0);
   1317  10972       Ric 		return (EACCES);
   1318  10972       Ric 	}
   1319  10972       Ric 
   1320  10972       Ric 	/*
   1321  10972       Ric 	 * Get the label of the mountpoint.  If mounting into the global
   1322  10972       Ric 	 * zone (i.e. mountpoint is not within an active zone and the
   1323  10972       Ric 	 * zoned property is off), the label must be default or
   1324  10972       Ric 	 * admin_low/admin_high only; no other checks are needed.
   1325  10972       Ric 	 */
   1326  10972       Ric 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
   1327  10972       Ric 	if (mntzone->zone_id == GLOBAL_ZONEID) {
   1328  10972       Ric 		uint64_t zoned;
   1329  10972       Ric 
   1330  10972       Ric 		zone_rele(mntzone);
   1331  10972       Ric 
   1332  10972       Ric 		if (dsl_prop_get_integer(osname,
   1333  10972       Ric 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
   1334  10972       Ric 			return (EACCES);
   1335  10972       Ric 		if (!zoned)
   1336  10972       Ric 			return (zfs_check_global_label(osname, ds_hexsl));
   1337  10972       Ric 		else
   1338  10972       Ric 			/*
   1339  10972       Ric 			 * This is the case of a zone dataset being mounted
   1340  10972       Ric 			 * initially, before the zone has been fully created;
   1341  10972       Ric 			 * allow this mount into global zone.
   1342  10972       Ric 			 */
   1343  10972       Ric 			return (0);
   1344  10972       Ric 	}
   1345  10972       Ric 
   1346  10972       Ric 	mnt_tsl = mntzone->zone_slabel;
   1347  10972       Ric 	ASSERT(mnt_tsl != NULL);
   1348  10972       Ric 	label_hold(mnt_tsl);
   1349  10972       Ric 	mnt_sl = label2bslabel(mnt_tsl);
   1350  10972       Ric 
   1351  10972       Ric 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
   1352  10972       Ric 		/*
   1353  10972       Ric 		 * The dataset doesn't have a real label, so fabricate one.
   1354  10972       Ric 		 */
   1355  10972       Ric 		char *str = NULL;
   1356  10972       Ric 
   1357  10972       Ric 		if (l_to_str_internal(mnt_sl, &str) == 0 &&
   1358  10972       Ric 		    dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
   1359  11022       Tom 		    ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
   1360  10972       Ric 			retv = 0;
   1361  10972       Ric 		if (str != NULL)
   1362  10972       Ric 			kmem_free(str, strlen(str) + 1);
   1363  10972       Ric 	} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
   1364  10972       Ric 		/*
   1365  10972       Ric 		 * Now compare labels to complete the MAC check.  If the
   1366  10972       Ric 		 * labels are equal then allow access.  If the mountpoint
   1367  10972       Ric 		 * label dominates the dataset label, allow readonly access.
   1368  10972       Ric 		 * Otherwise, access is denied.
   1369  10972       Ric 		 */
   1370  10972       Ric 		if (blequal(mnt_sl, &ds_sl))
   1371  10972       Ric 			retv = 0;
   1372  10972       Ric 		else if (bldominates(mnt_sl, &ds_sl)) {
   1373  10972       Ric 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
   1374  10972       Ric 			retv = 0;
   1375  10972       Ric 		}
   1376  10972       Ric 	}
   1377  10972       Ric 
   1378  10972       Ric 	label_rele(mnt_tsl);
   1379  10972       Ric 	zone_rele(mntzone);
   1380  10972       Ric 	return (retv);
   1381  10972       Ric }
   1382  10972       Ric 
   1383   1544  eschrock static int
   1384   1544  eschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
   1385   1544  eschrock {
   1386   1544  eschrock 	int error = 0;
   1387   1544  eschrock 	static int zfsrootdone = 0;
   1388   1544  eschrock 	zfsvfs_t *zfsvfs = NULL;
   1389   1544  eschrock 	znode_t *zp = NULL;
   1390   1544  eschrock 	vnode_t *vp = NULL;
   1391   6423   gw25295 	char *zfs_bootfs;
   1392   7147    taylor 	char *zfs_devid;
   1393   1544  eschrock 
   1394   1544  eschrock 	ASSERT(vfsp);
   1395   1544  eschrock 
   1396   1544  eschrock 	/*
   1397   3912     lling 	 * The filesystem that we mount as root is defined in the
   1398   6423   gw25295 	 * boot property "zfs-bootfs" with a format of
   1399   6423   gw25295 	 * "poolname/root-dataset-objnum".
   1400   1544  eschrock 	 */
   1401   1544  eschrock 	if (why == ROOT_INIT) {
   1402   1544  eschrock 		if (zfsrootdone++)
   1403   1544  eschrock 			return (EBUSY);
   1404   6423   gw25295 		/*
   1405   6423   gw25295 		 * the process of doing a spa_load will require the
   1406   6423   gw25295 		 * clock to be set before we could (for example) do
   1407   6423   gw25295 		 * something better by looking at the timestamp on
   1408   6423   gw25295 		 * an uberblock, so just set it to -1.
   1409   6423   gw25295 		 */
   1410   6423   gw25295 		clkset(-1);
   1411   1544  eschrock 
   1412   7147    taylor 		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
   1413   7147    taylor 			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
   1414   7147    taylor 			    "bootfs name");
   1415   6423   gw25295 			return (EINVAL);
   1416   5648     setje 		}
   1417   7147    taylor 		zfs_devid = spa_get_bootprop("diskdevid");
   1418   7147    taylor 		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
   1419   7147    taylor 		if (zfs_devid)
   1420   7147    taylor 			spa_free_bootprop(zfs_devid);
   1421   7147    taylor 		if (error) {
   1422   7147    taylor 			spa_free_bootprop(zfs_bootfs);
   1423   7147    taylor 			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
   1424   7147    taylor 			    error);
   1425   7147    taylor 			return (error);
   1426   7147    taylor 		}
   1427   7147    taylor 		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
   1428   7147    taylor 			spa_free_bootprop(zfs_bootfs);
   1429   7147    taylor 			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
   1430   6423   gw25295 			    error);
   1431   6423   gw25295 			return (error);
   1432   6423   gw25295 		}
   1433   3912     lling 
   1434   7147    taylor 		spa_free_bootprop(zfs_bootfs);
   1435   1544  eschrock 
   1436   1544  eschrock 		if (error = vfs_lock(vfsp))
   1437   1544  eschrock 			return (error);
   1438   1544  eschrock 
   1439   7046    ahrens 		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
   1440   7147    taylor 			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
   1441   1544  eschrock 			goto out;
   1442   6423   gw25295 		}
   1443   1544  eschrock 
   1444   1544  eschrock 		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
   1445   1544  eschrock 		ASSERT(zfsvfs);
   1446   6423   gw25295 		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
   1447   7147    taylor 			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
   1448   1544  eschrock 			goto out;
   1449   6423   gw25295 		}
   1450   1544  eschrock 
   1451   1544  eschrock 		vp = ZTOV(zp);
   1452   1544  eschrock 		mutex_enter(&vp->v_lock);
   1453   1544  eschrock 		vp->v_flag |= VROOT;
   1454   1544  eschrock 		mutex_exit(&vp->v_lock);
   1455   1544  eschrock 		rootvp = vp;
   1456   1544  eschrock 
   1457   1544  eschrock 		/*
   1458   6570     marks 		 * Leave rootvp held.  The root file system is never unmounted.
   1459   1544  eschrock 		 */
   1460   1544  eschrock 
   1461   1544  eschrock 		vfs_add((struct vnode *)0, vfsp,
   1462   1544  eschrock 		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
   1463   1544  eschrock out:
   1464   1544  eschrock 		vfs_unlock(vfsp);
   1465   6423   gw25295 		return (error);
   1466   1544  eschrock 	} else if (why == ROOT_REMOUNT) {
   1467   1544  eschrock 		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
   1468   1544  eschrock 		vfsp->vfs_flag |= VFS_REMOUNT;
   1469   4596     lling 
   1470   4596     lling 		/* refresh mount options */
   1471   4596     lling 		zfs_unregister_callbacks(vfsp->vfs_data);
   1472   4596     lling 		return (zfs_register_callbacks(vfsp));
   1473   4596     lling 
   1474   1544  eschrock 	} else if (why == ROOT_UNMOUNT) {
   1475   1544  eschrock 		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
   1476   1544  eschrock 		(void) zfs_sync(vfsp, 0, 0);
   1477   1544  eschrock 		return (0);
   1478   1544  eschrock 	}
   1479   1544  eschrock 
   1480   1544  eschrock 	/*
   1481   1544  eschrock 	 * if "why" is equal to anything else other than ROOT_INIT,
   1482   1544  eschrock 	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
   1483   1544  eschrock 	 */
   1484   1544  eschrock 	return (ENOTSUP);
   1485   1544  eschrock }
   1486   1544  eschrock 
   1487    789    ahrens /*ARGSUSED*/
   1488    789    ahrens static int
   1489    789    ahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
   1490    789    ahrens {
   1491    789    ahrens 	char		*osname;
   1492    789    ahrens 	pathname_t	spn;
   1493    789    ahrens 	int		error = 0;
   1494    789    ahrens 	uio_seg_t	fromspace = (uap->flags & MS_SYSSPACE) ?
   1495   3912     lling 	    UIO_SYSSPACE : UIO_USERSPACE;
   1496    789    ahrens 	int		canwrite;
   1497    789    ahrens 
   1498    789    ahrens 	if (mvp->v_type != VDIR)
   1499    789    ahrens 		return (ENOTDIR);
   1500    789    ahrens 
   1501    789    ahrens 	mutex_enter(&mvp->v_lock);
   1502    789    ahrens 	if ((uap->flags & MS_REMOUNT) == 0 &&
   1503    789    ahrens 	    (uap->flags & MS_OVERLAY) == 0 &&
   1504    789    ahrens 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
   1505    789    ahrens 		mutex_exit(&mvp->v_lock);
   1506    789    ahrens 		return (EBUSY);
   1507    789    ahrens 	}
   1508    789    ahrens 	mutex_exit(&mvp->v_lock);
   1509    789    ahrens 
   1510    789    ahrens 	/*
   1511    789    ahrens 	 * ZFS does not support passing unparsed data in via MS_DATA.
   1512    789    ahrens 	 * Users should use the MS_OPTIONSTR interface; this means
   1513    789    ahrens 	 * that all option parsing is already done and the options struct
   1514    789    ahrens 	 * can be interrogated.
   1515    789    ahrens 	 */
   1516    789    ahrens 	if ((uap->flags & MS_DATA) && uap->datalen > 0)
   1517    789    ahrens 		return (EINVAL);
   1518    789    ahrens 
   1519    789    ahrens 	/*
   1520    789    ahrens 	 * Get the objset name (the "special" mount argument).
   1521    789    ahrens 	 */
   1522    789    ahrens 	if (error = pn_get(uap->spec, fromspace, &spn))
   1523    789    ahrens 		return (error);
   1524    789    ahrens 
   1525    789    ahrens 	osname = spn.pn_path;
   1526    789    ahrens 
   1527   4543     marks 	/*
   1528   4543     marks 	 * Check for mount privilege?
   1529   4543     marks 	 *
   1530   4543     marks 	 * If we don't have privilege then see if
   1531   4543     marks 	 * we have local permission to allow it
   1532   4543     marks 	 */
   1533   4543     marks 	error = secpolicy_fs_mount(cr, mvp, vfsp);
   1534   4543     marks 	if (error) {
   1535   4543     marks 		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
   1536   4543     marks 		if (error == 0) {
   1537   4543     marks 			vattr_t		vattr;
   1538   4543     marks 
   1539   4543     marks 			/*
   1540   4543     marks 			 * Make sure user is the owner of the mount point
   1541   4543     marks 			 * or has sufficient privileges.
   1542   4543     marks 			 */
   1543   4543     marks 
   1544   4543     marks 			vattr.va_mask = AT_UID;
   1545   4543     marks 
   1546   5331       amw 			if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
   1547   4543     marks 				goto out;
   1548   4543     marks 			}
   1549   4543     marks 
   1550   5489     marks 			if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
   1551   5489     marks 			    VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
   1552   5489     marks 				error = EPERM;
   1553   4543     marks 				goto out;
   1554   4543     marks 			}
   1555   4543     marks 
   1556   4543     marks 			secpolicy_fs_mount_clearopts(cr, vfsp);
   1557   4543     marks 		} else {
   1558   4543     marks 			goto out;
   1559   4543     marks 		}
   1560   4543     marks 	}
   1561    789    ahrens 
   1562    789    ahrens 	/*
   1563    789    ahrens 	 * Refuse to mount a filesystem if we are in a local zone and the
   1564    789    ahrens 	 * dataset is not visible.
   1565    789    ahrens 	 */
   1566    789    ahrens 	if (!INGLOBALZONE(curproc) &&
   1567    789    ahrens 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
   1568    789    ahrens 		error = EPERM;
   1569   4596     lling 		goto out;
   1570   4596     lling 	}
   1571  10972       Ric 
   1572  10972       Ric 	error = zfs_mount_label_policy(vfsp, osname);
   1573  10972       Ric 	if (error)
   1574  10972       Ric 		goto out;
   1575   4596     lling 
   1576   4596     lling 	/*
   1577   4596     lling 	 * When doing a remount, we simply refresh our temporary properties
   1578   4596     lling 	 * according to those options set in the current VFS options.
   1579   4596     lling 	 */
   1580   4596     lling 	if (uap->flags & MS_REMOUNT) {
   1581   4596     lling 		/* refresh mount options */
   1582   4596     lling 		zfs_unregister_callbacks(vfsp->vfs_data);
   1583   4596     lling 		error = zfs_register_callbacks(vfsp);
   1584    789    ahrens 		goto out;
   1585    789    ahrens 	}
   1586    789    ahrens 
   1587   7046    ahrens 	error = zfs_domount(vfsp, osname);
   1588    789    ahrens 
   1589   9214     chris 	/*
   1590   9214     chris 	 * Add an extra VFS_HOLD on our parent vfs so that it can't
   1591   9214     chris 	 * disappear due to a forced unmount.
   1592   9214     chris 	 */
   1593   9246     chris 	if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
   1594   9214     chris 		VFS_HOLD(mvp->v_vfsp);
   1595   9214     chris 
   1596    789    ahrens out:
   1597    789    ahrens 	pn_free(&spn);
   1598    789    ahrens 	return (error);
   1599    789    ahrens }
   1600    789    ahrens 
   1601    789    ahrens static int
   1602    789    ahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
   1603    789    ahrens {
   1604    789    ahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1605    789    ahrens 	dev32_t d32;
   1606   2885    ahrens 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
   1607    789    ahrens 
   1608    789    ahrens 	ZFS_ENTER(zfsvfs);
   1609    789    ahrens 
   1610   2885    ahrens 	dmu_objset_space(zfsvfs->z_os,
   1611   2885    ahrens 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
   1612    789    ahrens 
   1613    789    ahrens 	/*
   1614    789    ahrens 	 * The underlying storage pool actually uses multiple block sizes.
   1615    789    ahrens 	 * We report the fragsize as the smallest block size we support,
   1616    789    ahrens 	 * and we report our blocksize as the filesystem's maximum blocksize.
   1617    789    ahrens 	 */
   1618    789    ahrens 	statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
   1619    789    ahrens 	statp->f_bsize = zfsvfs->z_max_blksz;
   1620    789    ahrens 
   1621    789    ahrens 	/*
   1622    789    ahrens 	 * The following report "total" blocks of various kinds in the
   1623    789    ahrens 	 * file system, but reported in terms of f_frsize - the
   1624    789    ahrens 	 * "fragment" size.
   1625    789    ahrens 	 */
   1626    789    ahrens 
   1627   2885    ahrens 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
   1628   2885    ahrens 	statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
   1629    789    ahrens 	statp->f_bavail = statp->f_bfree; /* no root reservation */
   1630    789    ahrens 
   1631    789    ahrens 	/*
   1632    789    ahrens 	 * statvfs() should really be called statufs(), because it assumes
   1633    789    ahrens 	 * static metadata.  ZFS doesn't preallocate files, so the best
   1634    789    ahrens 	 * we can do is report the max that could possibly fit in f_files,
   1635    789    ahrens 	 * and that minus the number actually used in f_ffree.
   1636    789    ahrens 	 * For f_ffree, report the smaller of the number of object available
   1637    789    ahrens 	 * and the number of blocks (each object will take at least a block).
   1638    789    ahrens 	 */
   1639   2885    ahrens 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
   1640    789    ahrens 	statp->f_favail = statp->f_ffree;	/* no "root reservation" */
   1641   2885    ahrens 	statp->f_files = statp->f_ffree + usedobjs;
   1642    789    ahrens 
   1643    789    ahrens 	(void) cmpldev(&d32, vfsp->vfs_dev);
   1644    789    ahrens 	statp->f_fsid = d32;
   1645    789    ahrens 
   1646    789    ahrens 	/*
   1647    789    ahrens 	 * We're a zfs filesystem.
   1648    789    ahrens 	 */
   1649    789    ahrens 	(void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
   1650    789    ahrens 
   1651   1123     marks 	statp->f_flag = vf_to_stf(vfsp->vfs_flag);
   1652    789    ahrens 
   1653    789    ahrens 	statp->f_namemax = ZFS_MAXNAMELEN;
   1654    789    ahrens 
   1655    789    ahrens 	/*
   1656    789    ahrens 	 * We have all of 32 characters to stuff a string here.
   1657    789    ahrens 	 * Is there anything useful we could/should provide?
   1658    789    ahrens 	 */
   1659    789    ahrens 	bzero(statp->f_fstr, sizeof (statp->f_fstr));
   1660    789    ahrens 
   1661    789    ahrens 	ZFS_EXIT(zfsvfs);
   1662    789    ahrens 	return (0);
   1663    789    ahrens }
   1664    789    ahrens 
   1665    789    ahrens static int
   1666    789    ahrens zfs_root(vfs_t *vfsp, vnode_t **vpp)
   1667    789    ahrens {
   1668    789    ahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1669    789    ahrens 	znode_t *rootzp;
   1670    789    ahrens 	int error;
   1671    789    ahrens 
   1672    789    ahrens 	ZFS_ENTER(zfsvfs);
   1673    789    ahrens 
   1674    789    ahrens 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
   1675    789    ahrens 	if (error == 0)
   1676    789    ahrens 		*vpp = ZTOV(rootzp);
   1677    789    ahrens 
   1678    789    ahrens 	ZFS_EXIT(zfsvfs);
   1679    789    ahrens 	return (error);
   1680    789    ahrens }
   1681    789    ahrens 
   1682   5326  ek110237 /*
   1683   5326  ek110237  * Teardown the zfsvfs::z_os.
   1684   5326  ek110237  *
   1685   5326  ek110237  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
   1686   5326  ek110237  * and 'z_teardown_inactive_lock' held.
   1687   5326  ek110237  */
   1688   5326  ek110237 static int
   1689   5326  ek110237 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
   1690   5326  ek110237 {
   1691   5642    maybee 	znode_t	*zp;
   1692   5326  ek110237 
   1693   5326  ek110237 	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
   1694   5326  ek110237 
   1695   5326  ek110237 	if (!unmounting) {
   1696   5326  ek110237 		/*
   1697   5326  ek110237 		 * We purge the parent filesystem's vfsp as the parent
   1698   5326  ek110237 		 * filesystem and all of its snapshots have their vnode's
   1699   5326  ek110237 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
   1700   5326  ek110237 		 * 'z_parent' is self referential for non-snapshots.
   1701   5326  ek110237 		 */
   1702   5326  ek110237 		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
   1703   5326  ek110237 	}
   1704   5326  ek110237 
   1705   5326  ek110237 	/*
   1706   5326  ek110237 	 * Close the zil. NB: Can't close the zil while zfs_inactive
   1707   5326  ek110237 	 * threads are blocked as zil_close can call zfs_inactive.
   1708   5326  ek110237 	 */
   1709   5326  ek110237 	if (zfsvfs->z_log) {
   1710   5326  ek110237 		zil_close(zfsvfs->z_log);
   1711   5326  ek110237 		zfsvfs->z_log = NULL;
   1712   5326  ek110237 	}
   1713   5326  ek110237 
   1714   5326  ek110237 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
   1715   5326  ek110237 
   1716   5326  ek110237 	/*
   1717   5326  ek110237 	 * If we are not unmounting (ie: online recv) and someone already
   1718   5326  ek110237 	 * unmounted this file system while we were doing the switcheroo,
   1719   5326  ek110237 	 * or a reopen of z_os failed then just bail out now.
   1720   5326  ek110237 	 */
   1721   5326  ek110237 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
   1722   5326  ek110237 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   1723   5326  ek110237 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
   1724   5326  ek110237 		return (EIO);
   1725   5326  ek110237 	}
   1726   5326  ek110237 
   1727   5326  ek110237 	/*
   1728   5326  ek110237 	 * At this point there are no vops active, and any new vops will
   1729   5326  ek110237 	 * fail with EIO since we have z_teardown_lock for writer (only
   1730   5326  ek110237 	 * relavent for forced unmount).
   1731   5326  ek110237 	 *
   1732   5326  ek110237 	 * Release all holds on dbufs.
   1733   5326  ek110237 	 */
   1734   5326  ek110237 	mutex_enter(&zfsvfs->z_znodes_lock);
   1735   5642    maybee 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
   1736   5642    maybee 	    zp = list_next(&zfsvfs->z_all_znodes, zp))
   1737   5446    ahrens 		if (zp->z_dbuf) {
   1738   5642    maybee 			ASSERT(ZTOV(zp)->v_count > 0);
   1739   5642    maybee 			zfs_znode_dmu_fini(zp);
   1740   5326  ek110237 		}
   1741   5326  ek110237 	mutex_exit(&zfsvfs->z_znodes_lock);
   1742   5326  ek110237 
   1743   5326  ek110237 	/*
   1744   5326  ek110237 	 * If we are unmounting, set the unmounted flag and let new vops
   1745   5326  ek110237 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
   1746   5326  ek110237 	 * other vops will fail with EIO.
   1747   5326  ek110237 	 */
   1748   5326  ek110237 	if (unmounting) {
   1749   5326  ek110237 		zfsvfs->z_unmounted = B_TRUE;
   1750   5326  ek110237 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
   1751   5326  ek110237 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   1752   5326  ek110237 	}
   1753   5326  ek110237 
   1754   5326  ek110237 	/*
   1755   5326  ek110237 	 * z_os will be NULL if there was an error in attempting to reopen
   1756   5326  ek110237 	 * zfsvfs, so just return as the properties had already been
   1757   5326  ek110237 	 * unregistered and cached data had been evicted before.
   1758   5326  ek110237 	 */
   1759   5326  ek110237 	if (zfsvfs->z_os == NULL)
   1760   5326  ek110237 		return (0);
   1761   5326  ek110237 
   1762   5326  ek110237 	/*
   1763   5326  ek110237 	 * Unregister properties.
   1764   5326  ek110237 	 */
   1765   5326  ek110237 	zfs_unregister_callbacks(zfsvfs);
   1766   5326  ek110237 
   1767   5326  ek110237 	/*
   1768   5326  ek110237 	 * Evict cached data
   1769   5326  ek110237 	 */
   1770   6083  ek110237 	if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
   1771   5429    maybee 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
   1772   6083  ek110237 		(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
   1773   5429    maybee 	}
   1774   5326  ek110237 
   1775   5326  ek110237 	return (0);
   1776   5326  ek110237 }
   1777   5326  ek110237 
   1778    789    ahrens /*ARGSUSED*/
   1779    789    ahrens static int
   1780    789    ahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
   1781    789    ahrens {
   1782    789    ahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1783   5326  ek110237 	objset_t *os;
   1784    789    ahrens 	int ret;
   1785    789    ahrens 
   1786   4543     marks 	ret = secpolicy_fs_unmount(cr, vfsp);
   1787   4543     marks 	if (ret) {
   1788   4543     marks 		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
   1789   4543     marks 		    ZFS_DELEG_PERM_MOUNT, cr);
   1790   4543     marks 		if (ret)
   1791   4543     marks 			return (ret);
   1792   4543     marks 	}
   1793   1484  ek110237 
   1794   4736  ek110237 	/*
   1795   4736  ek110237 	 * We purge the parent filesystem's vfsp as the parent filesystem
   1796   4736  ek110237 	 * and all of its snapshots have their vnode's v_vfsp set to the
   1797   4736  ek110237 	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
   1798   4736  ek110237 	 * referential for non-snapshots.
   1799   4736  ek110237 	 */
   1800   4736  ek110237 	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
   1801   1484  ek110237 
   1802    789    ahrens 	/*
   1803    789    ahrens 	 * Unmount any snapshots mounted under .zfs before unmounting the
   1804    789    ahrens 	 * dataset itself.
   1805    789    ahrens 	 */
   1806    789    ahrens 	if (zfsvfs->z_ctldir != NULL &&
   1807   4543     marks 	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
   1808    789    ahrens 		return (ret);
   1809   4543     marks 	}
   1810    789    ahrens 
   1811   4787    ahrens 	if (!(fflag & MS_FORCE)) {
   1812    789    ahrens 		/*
   1813   4787    ahrens 		 * Check the number of active vnodes in the file system.
   1814   4787    ahrens 		 * Our count is maintained in the vfs structure, but the
   1815   4787    ahrens 		 * number is off by 1 to indicate a hold on the vfs
   1816   4787    ahrens 		 * structure itself.
   1817   4787    ahrens 		 *
   1818   4787    ahrens 		 * The '.zfs' directory maintains a reference of its
   1819   4787    ahrens 		 * own, and any active references underneath are
   1820   4787    ahrens 		 * reflected in the vnode count.
   1821   4480   gw25295 		 */
   1822   4787    ahrens 		if (zfsvfs->z_ctldir == NULL) {
   1823   4787    ahrens 			if (vfsp->vfs_count > 1)
   1824   4787    ahrens 				return (EBUSY);
   1825   4787    ahrens 		} else {
   1826   4787    ahrens 			if (vfsp->vfs_count > 2 ||
   1827   5326  ek110237 			    zfsvfs->z_ctldir->v_count > 1)
   1828   4787    ahrens 				return (EBUSY);
   1829    789    ahrens 		}
   1830    789    ahrens 	}
   1831    789    ahrens 
   1832    789    ahrens 	vfsp->vfs_flag |= VFS_UNMOUNTED;
   1833   4787    ahrens 
   1834   5326  ek110237 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
   1835   5326  ek110237 	os = zfsvfs->z_os;
   1836   4787    ahrens 
   1837   4787    ahrens 	/*
   1838   5326  ek110237 	 * z_os will be NULL if there was an error in
   1839   5326  ek110237 	 * attempting to reopen zfsvfs.
   1840   4787    ahrens 	 */
   1841   5326  ek110237 	if (os != NULL) {
   1842   5326  ek110237 		/*
   1843   5326  ek110237 		 * Unset the objset user_ptr.
   1844   5326  ek110237 		 */
   1845  10298   Matthew 		mutex_enter(&os->os_user_ptr_lock);
   1846   5326  ek110237 		dmu_objset_set_user(os, NULL);
   1847  10298   Matthew 		mutex_exit(&os->os_user_ptr_lock);
   1848   5326  ek110237 
   1849   5326  ek110237 		/*
   1850   6689    maybee 		 * Finally release the objset
   1851   5326  ek110237 		 */
   1852  10298   Matthew 		dmu_objset_disown(os, zfsvfs);
   1853   4787    ahrens 	}
   1854   4787    ahrens 
   1855   4787    ahrens 	/*
   1856   4787    ahrens 	 * We can now safely destroy the '.zfs' directory node.
   1857   4787    ahrens 	 */
   1858   4787    ahrens 	if (zfsvfs->z_ctldir != NULL)
   1859   4787    ahrens 		zfsctl_destroy(zfsvfs);
   1860    789    ahrens 
   1861    789    ahrens 	return (0);
   1862    789    ahrens }
   1863    789    ahrens 
   1864    789    ahrens static int
   1865    789    ahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
   1866    789    ahrens {
   1867    789    ahrens 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
   1868    789    ahrens 	znode_t		*zp;
   1869    789    ahrens 	uint64_t	object = 0;
   1870    789    ahrens 	uint64_t	fid_gen = 0;
   1871    789    ahrens 	uint64_t	gen_mask;
   1872    789    ahrens 	uint64_t	zp_gen;
   1873    789    ahrens 	int 		i, err;
   1874    789    ahrens 
   1875    789    ahrens 	*vpp = NULL;
   1876    789    ahrens 
   1877    789    ahrens 	ZFS_ENTER(zfsvfs);
   1878    789    ahrens 
   1879    789    ahrens 	if (fidp->fid_len == LONG_FID_LEN) {
   1880    789    ahrens 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
   1881    789    ahrens 		uint64_t	objsetid = 0;
   1882    789    ahrens 		uint64_t	setgen = 0;
   1883    789    ahrens 
   1884    789    ahrens 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
   1885    789    ahrens 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
   1886    789    ahrens 
   1887    789    ahrens 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
   1888    789    ahrens 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
   1889    789    ahrens 
   1890    789    ahrens 		ZFS_EXIT(zfsvfs);
   1891    789    ahrens 
   1892    789    ahrens 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
   1893    789    ahrens 		if (err)
   1894    789    ahrens 			return (EINVAL);
   1895    789    ahrens 		ZFS_ENTER(zfsvfs);
   1896    789    ahrens 	}
   1897    789    ahrens 
   1898    789    ahrens 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
   1899    789    ahrens 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
   1900    789    ahrens 
   1901    789    ahrens 		for (i = 0; i < sizeof (zfid->zf_object); i++)
   1902    789    ahrens 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
   1903    789    ahrens 
   1904    789    ahrens 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
   1905    789    ahrens 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
   1906    789    ahrens 	} else {
   1907    789    ahrens 		ZFS_EXIT(zfsvfs);
   1908    789    ahrens 		return (EINVAL);
   1909    789    ahrens 	}
   1910    789    ahrens 
   1911    789    ahrens 	/* A zero fid_gen means we are in the .zfs control directories */
   1912    789    ahrens 	if (fid_gen == 0 &&
   1913    789    ahrens 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
   1914    789    ahrens 		*vpp = zfsvfs->z_ctldir;
   1915    789    ahrens 		ASSERT(*vpp != NULL);
   1916    789    ahrens 		if (object == ZFSCTL_INO_SNAPDIR) {
   1917    789    ahrens 			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
   1918   5331       amw 			    0, NULL, NULL, NULL, NULL, NULL) == 0);
   1919    789    ahrens 		} else {
   1920    789    ahrens 			VN_HOLD(*vpp);
   1921    789    ahrens 		}
   1922    789    ahrens 		ZFS_EXIT(zfsvfs);
   1923    789    ahrens 		return (0);
   1924    789    ahrens 	}
   1925    789    ahrens 
   1926    789    ahrens 	gen_mask = -1ULL >> (64 - 8 * i);
   1927    789    ahrens 
   1928    789    ahrens 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
   1929    789    ahrens 	if (err = zfs_zget(zfsvfs, object, &zp)) {
   1930    789    ahrens 		ZFS_EXIT(zfsvfs);
   1931    789    ahrens 		return (err);
   1932    789    ahrens 	}
   1933    789    ahrens 	zp_gen = zp->z_phys->zp_gen & gen_mask;
   1934    789    ahrens 	if (zp_gen == 0)
   1935    789    ahrens 		zp_gen = 1;
   1936   3461    ahrens 	if (zp->z_unlinked || zp_gen != fid_gen) {
   1937    789    ahrens 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
   1938    789    ahrens 		VN_RELE(ZTOV(zp));
   1939    789    ahrens 		ZFS_EXIT(zfsvfs);
   1940    789    ahrens 		return (EINVAL);
   1941    789    ahrens 	}
   1942    789    ahrens 
   1943    789    ahrens 	*vpp = ZTOV(zp);
   1944    789    ahrens 	ZFS_EXIT(zfsvfs);
   1945    789    ahrens 	return (0);
   1946    789    ahrens }
   1947    789    ahrens 
   1948   5326  ek110237 /*
   1949   5326  ek110237  * Block out VOPs and close zfsvfs_t::z_os
   1950   5326  ek110237  *
   1951   5326  ek110237  * Note, if successful, then we return with the 'z_teardown_lock' and
   1952   5326  ek110237  * 'z_teardown_inactive_lock' write held.
   1953   5326  ek110237  */
   1954   5326  ek110237 int
   1955  10298   Matthew zfs_suspend_fs(zfsvfs_t *zfsvfs)
   1956   5326  ek110237 {
   1957   5326  ek110237 	int error;
   1958   5326  ek110237 
   1959   5326  ek110237 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
   1960   5326  ek110237 		return (error);
   1961  10298   Matthew 	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
   1962   5326  ek110237 
   1963   5326  ek110237 	return (0);
   1964   5326  ek110237 }
   1965   5326  ek110237 
   1966   5326  ek110237 /*
   1967   5326  ek110237  * Reopen zfsvfs_t::z_os and release VOPs.
   1968   5326  ek110237  */
   1969   5326  ek110237 int
   1970  10298   Matthew zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
   1971   5326  ek110237 {
   1972   5326  ek110237 	int err;
   1973   5326  ek110237 
   1974   5326  ek110237 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
   1975   5326  ek110237 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
   1976   5326  ek110237 
   1977  10298   Matthew 	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
   1978  10298   Matthew 	    &zfsvfs->z_os);
   1979   5326  ek110237 	if (err) {
   1980   5326  ek110237 		zfsvfs->z_os = NULL;
   1981   5326  ek110237 	} else {
   1982   5326  ek110237 		znode_t *zp;
   1983   5326  ek110237 
   1984   5326  ek110237 		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
   1985   5326  ek110237 
   1986   5326  ek110237 		/*
   1987   5326  ek110237 		 * Attempt to re-establish all the active znodes with
   1988   5326  ek110237 		 * their dbufs.  If a zfs_rezget() fails, then we'll let
   1989   5326  ek110237 		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
   1990   5326  ek110237 		 * when they try to use their znode.
   1991   5326  ek110237 		 */
   1992   5326  ek110237 		mutex_enter(&zfsvfs->z_znodes_lock);
   1993   5326  ek110237 		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
   1994   5326  ek110237 		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
   1995   5326  ek110237 			(void) zfs_rezget(zp);
   1996   5326  ek110237 		}
   1997   5326  ek110237 		mutex_exit(&zfsvfs->z_znodes_lock);
   1998   5326  ek110237 
   1999   5326  ek110237 	}
   2000   5326  ek110237 
   2001   5326  ek110237 	/* release the VOPs */
   2002   5326  ek110237 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
   2003   5326  ek110237 	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
   2004   5326  ek110237 
   2005   5326  ek110237 	if (err) {
   2006   5326  ek110237 		/*
   2007   5326  ek110237 		 * Since we couldn't reopen zfsvfs::z_os, force
   2008   5326  ek110237 		 * unmount this file system.
   2009   5326  ek110237 		 */
   2010   5326  ek110237 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
   2011   5326  ek110237 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED());
   2012   5326  ek110237 	}
   2013   5326  ek110237 	return (err);
   2014   5326  ek110237 }
   2015   5326  ek110237 
   2016    789    ahrens static void
   2017    789    ahrens zfs_freevfs(vfs_t *vfsp)
   2018    789    ahrens {
   2019    789    ahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   2020   9214     chris 
   2021   9214     chris 	/*
   2022   9214     chris 	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
   2023   9214     chris 	 * from zfs_mount().  Release it here.
   2024   9214     chris 	 */
   2025   9214     chris 	if (zfsvfs->z_issnap)
   2026   9214     chris 		VFS_RELE(zfsvfs->z_parent->z_vfs);
   2027   9214     chris 
   2028   9396   Matthew 	zfsvfs_free(zfsvfs);
   2029    789    ahrens 
   2030    789    ahrens 	atomic_add_32(&zfs_active_fs_count, -1);
   2031    789    ahrens }
   2032    789    ahrens 
   2033    789    ahrens /*
   2034    789    ahrens  * VFS_INIT() initialization.  Note that there is no VFS_FINI(),
   2035    789    ahrens  * so we can't safely do any non-idempotent initialization here.
   2036    789    ahrens  * Leave that to zfs_init() and zfs_fini(), which are called
   2037    789    ahrens  * from the module's _init() and _fini() entry points.
   2038    789    ahrens  */
   2039    789    ahrens /*ARGSUSED*/
   2040    789    ahrens static int
   2041    789    ahrens zfs_vfsinit(int fstype, char *name)
   2042    789    ahrens {
   2043    789    ahrens 	int error;
   2044    789    ahrens 
   2045    789    ahrens 	zfsfstype = fstype;
   2046    789    ahrens 
   2047    789    ahrens 	/*
   2048    789    ahrens 	 * Setup vfsops and vnodeops tables.
   2049    789    ahrens 	 */
   2050    789    ahrens 	error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
   2051    789    ahrens 	if (error != 0) {
   2052    789    ahrens 		cmn_err(CE_WARN, "zfs: bad vfs ops template");
   2053    789    ahrens 	}
   2054    789    ahrens 
   2055    789    ahrens 	error = zfs_create_op_tables();
   2056    789    ahrens 	if (error) {
   2057    789    ahrens 		zfs_remove_op_tables();
   2058    789    ahrens 		cmn_err(CE_WARN, "zfs: bad vnode ops template");
   2059    789    ahrens 		(void) vfs_freevfsops_by_type(zfsfstype);
   2060    789    ahrens 		return (error);
   2061    789    ahrens 	}
   2062    789    ahrens 
   2063    789    ahrens 	mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
   2064    789    ahrens 
   2065    789    ahrens 	/*
   2066    849   bonwick 	 * Unique major number for all zfs mounts.
   2067    849   bonwick 	 * If we run out of 32-bit minors, we'll getudev() another major.
   2068    789    ahrens 	 */
   2069    849   bonwick 	zfs_major = ddi_name_to_major(ZFS_DRIVER);
   2070    849   bonwick 	zfs_minor = ZFS_MIN_MINOR;
   2071    789    ahrens 
   2072    789    ahrens 	return (0);
   2073    789    ahrens }
   2074    789    ahrens 
   2075    789    ahrens void
   2076    789    ahrens zfs_init(void)
   2077    789    ahrens {
   2078    789    ahrens 	/*
   2079    789    ahrens 	 * Initialize .zfs directory structures
   2080    789    ahrens 	 */
   2081    789    ahrens 	zfsctl_init();
   2082    789    ahrens 
   2083    789    ahrens 	/*
   2084    789    ahrens 	 * Initialize znode cache, vnode ops, etc...
   2085    789    ahrens 	 */
   2086    789    ahrens 	zfs_znode_init();
   2087   9396   Matthew 
   2088   9396   Matthew 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
   2089    789    ahrens }
   2090    789    ahrens 
   2091    789    ahrens void
   2092    789    ahrens zfs_fini(void)
   2093    789    ahrens {
   2094    789    ahrens 	zfsctl_fini();
   2095    789    ahrens 	zfs_znode_fini();
   2096    789    ahrens }
   2097    789    ahrens 
   2098    789    ahrens int
   2099    789    ahrens zfs_busy(void)
   2100    789    ahrens {
   2101    789    ahrens 	return (zfs_active_fs_count != 0);
   2102    789    ahrens }
   2103    789    ahrens 
   2104   4577    ahrens int
   2105   9396   Matthew zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
   2106   4577    ahrens {
   2107   4577    ahrens 	int error;
   2108   9396   Matthew 	objset_t *os = zfsvfs->z_os;
   2109   4577    ahrens 	dmu_tx_t *tx;
   2110   4577    ahrens 
   2111   4577    ahrens 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
   2112   4577    ahrens 		return (EINVAL);
   2113   4577    ahrens 
   2114   9396   Matthew 	if (newvers < zfsvfs->z_version)
   2115   9396   Matthew 		return (EINVAL);
   2116   4577    ahrens 
   2117   4577    ahrens 	tx = dmu_tx_create(os);
   2118   9396   Matthew 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
   2119   4577    ahrens 	error = dmu_tx_assign(tx, TXG_WAIT);
   2120   4577    ahrens 	if (error) {
   2121   4577    ahrens 		dmu_tx_abort(tx);
   2122   9396   Matthew 		return (error);
   2123   4577    ahrens 	}
   2124   9396   Matthew 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
   2125   9396   Matthew 	    8, 1, &newvers, tx);
   2126   9396   Matthew 
   2127   9396   Matthew 	if (error) {
   2128   9396   Matthew 		dmu_tx_commit(tx);
   2129   9396   Matthew 		return (error);
   2130   9396   Matthew 	}
   2131   4577    ahrens 
   2132   4577    ahrens 	spa_history_internal_log(LOG_DS_UPGRADE,
   2133   4577    ahrens 	    dmu_objset_spa(os), tx, CRED(),
   2134   9396   Matthew 	    "oldver=%llu newver=%llu dataset = %llu",
   2135   9396   Matthew 	    zfsvfs->z_version, newvers, dmu_objset_id(os));
   2136   9396   Matthew 
   2137   4577    ahrens 	dmu_tx_commit(tx);
   2138   4577    ahrens 
   2139   9396   Matthew 	zfsvfs->z_version = newvers;
   2140   9396   Matthew 
   2141   9396   Matthew 	if (zfsvfs->z_version >= ZPL_VERSION_FUID)
   2142   9396   Matthew 		zfs_set_fuid_feature(zfsvfs);
   2143   9396   Matthew 
   2144   9396   Matthew 	return (0);
   2145   4577    ahrens }
   2146   4577    ahrens 
   2147   5498      timh /*
   2148   5498      timh  * Read a property stored within the master node.
   2149   5498      timh  */
   2150   5498      timh int
   2151   5498      timh zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
   2152   5498      timh {
   2153   5498      timh 	const char *pname;
   2154   7184      timh 	int error = ENOENT;
   2155   5498      timh 
   2156   5498      timh 	/*
   2157   5498      timh 	 * Look up the file system's value for the property.  For the
   2158   5498      timh 	 * version property, we look up a slightly different string.
   2159   5498      timh 	 */
   2160   5498      timh 	if (prop == ZFS_PROP_VERSION)
   2161   5498      timh 		pname = ZPL_VERSION_STR;
   2162   5498      timh 	else
   2163   5498      timh 		pname = zfs_prop_to_name(prop);
   2164   5498      timh 
   2165   7184      timh 	if (os != NULL)
   2166   7184      timh 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
   2167   5498      timh 
   2168   6404    maybee 	if (error == ENOENT) {
   2169   5498      timh 		/* No value set, use the default value */
   2170   5498      timh 		switch (prop) {
   2171   6404    maybee 		case ZFS_PROP_VERSION:
   2172   6404    maybee 			*value = ZPL_VERSION;
   2173   6404    maybee 			break;
   2174   5498      timh 		case ZFS_PROP_NORMALIZE:
   2175   5498      timh 		case ZFS_PROP_UTF8ONLY:
   2176   5498      timh 			*value = 0;
   2177   5498      timh 			break;
   2178   5498      timh 		case ZFS_PROP_CASE:
   2179   5498      timh 			*value = ZFS_CASE_SENSITIVE;
   2180   5498      timh 			break;
   2181   5498      timh 		default:
   2182   6404    maybee 			return (error);
   2183   5498      timh 		}
   2184   6404    maybee 		error = 0;
   2185   5498      timh 	}
   2186   6404    maybee 	return (error);
   2187   5498      timh }
   2188   5498      timh 
   2189    789    ahrens static vfsdef_t vfw = {
   2190    789    ahrens 	VFSDEF_VERSION,
   2191    789    ahrens 	MNTTYPE_ZFS,
   2192    789    ahrens 	zfs_vfsinit,
   2193   5331       amw 	VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
   2194   5331       amw 	    VSW_XID,
   2195    789    ahrens 	&zfs_mntopts
   2196    789    ahrens };
   2197    789    ahrens 
   2198    789    ahrens struct modlfs zfs_modlfs = {
   2199   4577    ahrens 	&mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
   2200    789    ahrens };
   2201