Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * ZFS control directory (a.k.a. ".zfs")
     28  *
     29  * This directory provides a common location for all ZFS meta-objects.
     30  * Currently, this is only the 'snapshot' directory, but this may expand in the
     31  * future.  The elements are built using the GFS primitives, as the hierarchy
     32  * does not actually exist on disk.
     33  *
     34  * For 'snapshot', we don't want to have all snapshots always mounted, because
     35  * this would take up a huge amount of space in /etc/mnttab.  We have three
     36  * types of objects:
     37  *
     38  * 	ctldir ------> snapshotdir -------> snapshot
     39  *                                             |
     40  *                                             |
     41  *                                             V
     42  *                                         mounted fs
     43  *
     44  * The 'snapshot' node contains just enough information to lookup '..' and act
     45  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
     46  * perform an automount of the underlying filesystem and return the
     47  * corresponding vnode.
     48  *
     49  * All mounts are handled automatically by the kernel, but unmounts are
     50  * (currently) handled from user land.  The main reason is that there is no
     51  * reliable way to auto-unmount the filesystem when it's "no longer in use".
     52  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
     53  * unmounts any snapshots within the snapshot directory.
     54  *
     55  * The '.zfs', '.zfs/snapshot', and all directories created under
     56  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
     57  * share the same vfs_t as the head filesystem (what '.zfs' lives under).
     58  *
     59  * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
     60  * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
     61  * However, vnodes within these mounted on file systems have their v_vfsp
     62  * fields set to the head filesystem to make NFS happy (see
     63  * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
     64  * so that it cannot be freed until all snapshots have been unmounted.
     65  */
     66 
     67 #include <fs/fs_subr.h>
     68 #include <sys/zfs_ctldir.h>
     69 #include <sys/zfs_ioctl.h>
     70 #include <sys/zfs_vfsops.h>
     71 #include <sys/vfs_opreg.h>
     72 #include <sys/gfs.h>
     73 #include <sys/stat.h>
     74 #include <sys/dmu.h>
     75 #include <sys/dsl_deleg.h>
     76 #include <sys/mount.h>
     77 #include <sys/sunddi.h>
     78 
     79 #include "zfs_namecheck.h"
     80 
     81 typedef struct zfsctl_node {
     82 	gfs_dir_t	zc_gfs_private;
     83 	uint64_t	zc_id;
     84 	timestruc_t	zc_cmtime;	/* ctime and mtime, always the same */
     85 } zfsctl_node_t;
     86 
     87 typedef struct zfsctl_snapdir {
     88 	zfsctl_node_t	sd_node;
     89 	kmutex_t	sd_lock;
     90 	avl_tree_t	sd_snaps;
     91 } zfsctl_snapdir_t;
     92 
     93 typedef struct {
     94 	char		*se_name;
     95 	vnode_t		*se_root;
     96 	avl_node_t	se_node;
     97 } zfs_snapentry_t;
     98 
     99 static int
    100 snapentry_compare(const void *a, const void *b)
    101 {
    102 	const zfs_snapentry_t *sa = a;
    103 	const zfs_snapentry_t *sb = b;
    104 	int ret = strcmp(sa->se_name, sb->se_name);
    105 
    106 	if (ret < 0)
    107 		return (-1);
    108 	else if (ret > 0)
    109 		return (1);
    110 	else
    111 		return (0);
    112 }
    113 
    114 vnodeops_t *zfsctl_ops_root;
    115 vnodeops_t *zfsctl_ops_snapdir;
    116 vnodeops_t *zfsctl_ops_snapshot;
    117 vnodeops_t *zfsctl_ops_shares;
    118 vnodeops_t *zfsctl_ops_shares_dir;
    119 
    120 static const fs_operation_def_t zfsctl_tops_root[];
    121 static const fs_operation_def_t zfsctl_tops_snapdir[];
    122 static const fs_operation_def_t zfsctl_tops_snapshot[];
    123 static const fs_operation_def_t zfsctl_tops_shares[];
    124 
    125 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
    126 static vnode_t *zfsctl_mknode_shares(vnode_t *);
    127 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
    128 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
    129 
    130 static gfs_opsvec_t zfsctl_opsvec[] = {
    131 	{ ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
    132 	{ ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
    133 	{ ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
    134 	{ ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
    135 	{ ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
    136 	{ NULL }
    137 };
    138 
    139 /*
    140  * Root directory elements.  We only have two entries
    141  * snapshot and shares.
    142  */
    143 static gfs_dirent_t zfsctl_root_entries[] = {
    144 	{ "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
    145 	{ "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
    146 	{ NULL }
    147 };
    148 
    149 /* include . and .. in the calculation */
    150 #define	NROOT_ENTRIES	((sizeof (zfsctl_root_entries) / \
    151     sizeof (gfs_dirent_t)) + 1)
    152 
    153 
    154 /*
    155  * Initialize the various GFS pieces we'll need to create and manipulate .zfs
    156  * directories.  This is called from the ZFS init routine, and initializes the
    157  * vnode ops vectors that we'll be using.
    158  */
    159 void
    160 zfsctl_init(void)
    161 {
    162 	VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
    163 }
    164 
    165 void
    166 zfsctl_fini(void)
    167 {
    168 	/*
    169 	 * Remove vfsctl vnode ops
    170 	 */
    171 	if (zfsctl_ops_root)
    172 		vn_freevnodeops(zfsctl_ops_root);
    173 	if (zfsctl_ops_snapdir)
    174 		vn_freevnodeops(zfsctl_ops_snapdir);
    175 	if (zfsctl_ops_snapshot)
    176 		vn_freevnodeops(zfsctl_ops_snapshot);
    177 	if (zfsctl_ops_shares)
    178 		vn_freevnodeops(zfsctl_ops_shares);
    179 	if (zfsctl_ops_shares_dir)
    180 		vn_freevnodeops(zfsctl_ops_shares_dir);
    181 
    182 	zfsctl_ops_root = NULL;
    183 	zfsctl_ops_snapdir = NULL;
    184 	zfsctl_ops_snapshot = NULL;
    185 	zfsctl_ops_shares = NULL;
    186 	zfsctl_ops_shares_dir = NULL;
    187 }
    188 
    189 /*
    190  * Return the inode number associated with the 'snapshot' or
    191  * 'shares' directory.
    192  */
    193 /* ARGSUSED */
    194 static ino64_t
    195 zfsctl_root_inode_cb(vnode_t *vp, int index)
    196 {
    197 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    198 
    199 	ASSERT(index <= 2);
    200 
    201 	if (index == 0)
    202 		return (ZFSCTL_INO_SNAPDIR);
    203 
    204 	return (zfsvfs->z_shares_dir);
    205 }
    206 
    207 /*
    208  * Create the '.zfs' directory.  This directory is cached as part of the VFS
    209  * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
    210  * therefore checks against a vfs_count of 2 instead of 1.  This reference
    211  * is removed when the ctldir is destroyed in the unmount.
    212  */
    213 void
    214 zfsctl_create(zfsvfs_t *zfsvfs)
    215 {
    216 	vnode_t *vp, *rvp;
    217 	zfsctl_node_t *zcp;
    218 
    219 	ASSERT(zfsvfs->z_ctldir == NULL);
    220 
    221 	vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
    222 	    zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
    223 	    zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
    224 	zcp = vp->v_data;
    225 	zcp->zc_id = ZFSCTL_INO_ROOT;
    226 
    227 	VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
    228 	ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime);
    229 	VN_RELE(rvp);
    230 
    231 	/*
    232 	 * We're only faking the fact that we have a root of a filesystem for
    233 	 * the sake of the GFS interfaces.  Undo the flag manipulation it did
    234 	 * for us.
    235 	 */
    236 	vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
    237 
    238 	zfsvfs->z_ctldir = vp;
    239 }
    240 
    241 /*
    242  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
    243  * There might still be more references if we were force unmounted, but only
    244  * new zfs_inactive() calls can occur and they don't reference .zfs
    245  */
    246 void
    247 zfsctl_destroy(zfsvfs_t *zfsvfs)
    248 {
    249 	VN_RELE(zfsvfs->z_ctldir);
    250 	zfsvfs->z_ctldir = NULL;
    251 }
    252 
    253 /*
    254  * Given a root znode, retrieve the associated .zfs directory.
    255  * Add a hold to the vnode and return it.
    256  */
    257 vnode_t *
    258 zfsctl_root(znode_t *zp)
    259 {
    260 	ASSERT(zfs_has_ctldir(zp));
    261 	VN_HOLD(zp->z_zfsvfs->z_ctldir);
    262 	return (zp->z_zfsvfs->z_ctldir);
    263 }
    264 
    265 /*
    266  * Common open routine.  Disallow any write access.
    267  */
    268 /* ARGSUSED */
    269 static int
    270 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
    271 {
    272 	if (flags & FWRITE)
    273 		return (EACCES);
    274 
    275 	return (0);
    276 }
    277 
    278 /*
    279  * Common close routine.  Nothing to do here.
    280  */
    281 /* ARGSUSED */
    282 static int
    283 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
    284     cred_t *cr, caller_context_t *ct)
    285 {
    286 	return (0);
    287 }
    288 
    289 /*
    290  * Common access routine.  Disallow writes.
    291  */
    292 /* ARGSUSED */
    293 static int
    294 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
    295     caller_context_t *ct)
    296 {
    297 	if (flags & V_ACE_MASK) {
    298 		if (mode & ACE_ALL_WRITE_PERMS)
    299 			return (EACCES);
    300 	} else {
    301 		if (mode & VWRITE)
    302 			return (EACCES);
    303 	}
    304 
    305 	return (0);
    306 }
    307 
    308 /*
    309  * Common getattr function.  Fill in basic information.
    310  */
    311 static void
    312 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
    313 {
    314 	timestruc_t	now;
    315 
    316 	vap->va_uid = 0;
    317 	vap->va_gid = 0;
    318 	vap->va_rdev = 0;
    319 	/*
    320 	 * We are a purely virtual object, so we have no
    321 	 * blocksize or allocated blocks.
    322 	 */
    323 	vap->va_blksize = 0;
    324 	vap->va_nblocks = 0;
    325 	vap->va_seq = 0;
    326 	vap->va_fsid = vp->v_vfsp->vfs_dev;
    327 	vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
    328 	    S_IROTH | S_IXOTH;
    329 	vap->va_type = VDIR;
    330 	/*
    331 	 * We live in the now (for atime).
    332 	 */
    333 	gethrestime(&now);
    334 	vap->va_atime = now;
    335 }
    336 
    337 /*ARGSUSED*/
    338 static int
    339 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
    340 {
    341 	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
    342 	zfsctl_node_t	*zcp = vp->v_data;
    343 	uint64_t	object = zcp->zc_id;
    344 	zfid_short_t	*zfid;
    345 	int		i;
    346 
    347 	ZFS_ENTER(zfsvfs);
    348 
    349 	if (fidp->fid_len < SHORT_FID_LEN) {
    350 		fidp->fid_len = SHORT_FID_LEN;
    351 		ZFS_EXIT(zfsvfs);
    352 		return (ENOSPC);
    353 	}
    354 
    355 	zfid = (zfid_short_t *)fidp;
    356 
    357 	zfid->zf_len = SHORT_FID_LEN;
    358 
    359 	for (i = 0; i < sizeof (zfid->zf_object); i++)
    360 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
    361 
    362 	/* .zfs znodes always have a generation number of 0 */
    363 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
    364 		zfid->zf_gen[i] = 0;
    365 
    366 	ZFS_EXIT(zfsvfs);
    367 	return (0);
    368 }
    369 
    370 
    371 /*ARGSUSED*/
    372 static int
    373 zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
    374 {
    375 	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
    376 	znode_t		*dzp;
    377 	int		error;
    378 
    379 	ZFS_ENTER(zfsvfs);
    380 
    381 	if (zfsvfs->z_shares_dir == 0) {
    382 		ZFS_EXIT(zfsvfs);
    383 		return (ENOTSUP);
    384 	}
    385 
    386 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
    387 		error = VOP_FID(ZTOV(dzp), fidp, ct);
    388 		VN_RELE(ZTOV(dzp));
    389 	}
    390 
    391 	ZFS_EXIT(zfsvfs);
    392 	return (error);
    393 }
    394 /*
    395  * .zfs inode namespace
    396  *
    397  * We need to generate unique inode numbers for all files and directories
    398  * within the .zfs pseudo-filesystem.  We use the following scheme:
    399  *
    400  * 	ENTRY			ZFSCTL_INODE
    401  * 	.zfs			1
    402  * 	.zfs/snapshot		2
    403  * 	.zfs/snapshot/<snap>	objectid(snap)
    404  */
    405 
    406 #define	ZFSCTL_INO_SNAP(id)	(id)
    407 
    408 /*
    409  * Get root directory attributes.
    410  */
    411 /* ARGSUSED */
    412 static int
    413 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    414     caller_context_t *ct)
    415 {
    416 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    417 	zfsctl_node_t *zcp = vp->v_data;
    418 
    419 	ZFS_ENTER(zfsvfs);
    420 	vap->va_nodeid = ZFSCTL_INO_ROOT;
    421 	vap->va_nlink = vap->va_size = NROOT_ENTRIES;
    422 	vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
    423 
    424 	zfsctl_common_getattr(vp, vap);
    425 	ZFS_EXIT(zfsvfs);
    426 
    427 	return (0);
    428 }
    429 
    430 /*
    431  * Special case the handling of "..".
    432  */
    433 /* ARGSUSED */
    434 int
    435 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    436     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    437     int *direntflags, pathname_t *realpnp)
    438 {
    439 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    440 	int err;
    441 
    442 	/*
    443 	 * No extended attributes allowed under .zfs
    444 	 */
    445 	if (flags & LOOKUP_XATTR)
    446 		return (EINVAL);
    447 
    448 	ZFS_ENTER(zfsvfs);
    449 
    450 	if (strcmp(nm, "..") == 0) {
    451 		err = VFS_ROOT(dvp->v_vfsp, vpp);
    452 	} else {
    453 		err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
    454 		    cr, ct, direntflags, realpnp);
    455 	}
    456 
    457 	ZFS_EXIT(zfsvfs);
    458 
    459 	return (err);
    460 }
    461 
    462 static int
    463 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
    464     caller_context_t *ct)
    465 {
    466 	/*
    467 	 * We only care about ACL_ENABLED so that libsec can
    468 	 * display ACL correctly and not default to POSIX draft.
    469 	 */
    470 	if (cmd == _PC_ACL_ENABLED) {
    471 		*valp = _ACL_ACE_ENABLED;
    472 		return (0);
    473 	}
    474 
    475 	return (fs_pathconf(vp, cmd, valp, cr, ct));
    476 }
    477 
    478 static const fs_operation_def_t zfsctl_tops_root[] = {
    479 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
    480 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
    481 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
    482 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_root_getattr }	},
    483 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
    484 	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir } 	},
    485 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_root_lookup }	},
    486 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
    487 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
    488 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = zfsctl_pathconf }	},
    489 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_common_fid	}	},
    490 	{ NULL }
    491 };
    492 
    493 static int
    494 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
    495 {
    496 	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
    497 
    498 	if (snapshot_namecheck(name, NULL, NULL) != 0)
    499 		return (EILSEQ);
    500 	dmu_objset_name(os, zname);
    501 	if (strlen(zname) + 1 + strlen(name) >= len)
    502 		return (ENAMETOOLONG);
    503 	(void) strcat(zname, "@");
    504 	(void) strcat(zname, name);
    505 	return (0);
    506 }
    507 
    508 static int
    509 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
    510 {
    511 	vnode_t *svp = sep->se_root;
    512 	int error;
    513 
    514 	ASSERT(vn_ismntpt(svp));
    515 
    516 	/* this will be dropped by dounmount() */
    517 	if ((error = vn_vfswlock(svp)) != 0)
    518 		return (error);
    519 
    520 	VN_HOLD(svp);
    521 	error = dounmount(vn_mountedvfs(svp), fflags, cr);
    522 	if (error) {
    523 		VN_RELE(svp);
    524 		return (error);
    525 	}
    526 
    527 	/*
    528 	 * We can't use VN_RELE(), as that will try to invoke
    529 	 * zfsctl_snapdir_inactive(), which would cause us to destroy
    530 	 * the sd_lock mutex held by our caller.
    531 	 */
    532 	ASSERT(svp->v_count == 1);
    533 	gfs_vop_inactive(svp, cr, NULL);
    534 
    535 	kmem_free(sep->se_name, strlen(sep->se_name) + 1);
    536 	kmem_free(sep, sizeof (zfs_snapentry_t));
    537 
    538 	return (0);
    539 }
    540 
    541 static void
    542 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
    543 {
    544 	avl_index_t where;
    545 	vfs_t *vfsp;
    546 	refstr_t *pathref;
    547 	char newpath[MAXNAMELEN];
    548 	char *tail;
    549 
    550 	ASSERT(MUTEX_HELD(&sdp->sd_lock));
    551 	ASSERT(sep != NULL);
    552 
    553 	vfsp = vn_mountedvfs(sep->se_root);
    554 	ASSERT(vfsp != NULL);
    555 
    556 	vfs_lock_wait(vfsp);
    557 
    558 	/*
    559 	 * Change the name in the AVL tree.
    560 	 */
    561 	avl_remove(&sdp->sd_snaps, sep);
    562 	kmem_free(sep->se_name, strlen(sep->se_name) + 1);
    563 	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
    564 	(void) strcpy(sep->se_name, nm);
    565 	VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
    566 	avl_insert(&sdp->sd_snaps, sep, where);
    567 
    568 	/*
    569 	 * Change the current mountpoint info:
    570 	 * 	- update the tail of the mntpoint path
    571 	 *	- update the tail of the resource path
    572 	 */
    573 	pathref = vfs_getmntpoint(vfsp);
    574 	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
    575 	VERIFY((tail = strrchr(newpath, '/')) != NULL);
    576 	*(tail+1) = '\0';
    577 	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
    578 	(void) strcat(newpath, nm);
    579 	refstr_rele(pathref);
    580 	vfs_setmntpoint(vfsp, newpath);
    581 
    582 	pathref = vfs_getresource(vfsp);
    583 	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
    584 	VERIFY((tail = strrchr(newpath, '@')) != NULL);
    585 	*(tail+1) = '\0';
    586 	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
    587 	(void) strcat(newpath, nm);
    588 	refstr_rele(pathref);
    589 	vfs_setresource(vfsp, newpath);
    590 
    591 	vfs_unlock(vfsp);
    592 }
    593 
    594 /*ARGSUSED*/
    595 static int
    596 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
    597     cred_t *cr, caller_context_t *ct, int flags)
    598 {
    599 	zfsctl_snapdir_t *sdp = sdvp->v_data;
    600 	zfs_snapentry_t search, *sep;
    601 	zfsvfs_t *zfsvfs;
    602 	avl_index_t where;
    603 	char from[MAXNAMELEN], to[MAXNAMELEN];
    604 	char real[MAXNAMELEN];
    605 	int err;
    606 
    607 	zfsvfs = sdvp->v_vfsp->vfs_data;
    608 	ZFS_ENTER(zfsvfs);
    609 
    610 	if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
    611 		err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
    612 		    MAXNAMELEN, NULL);
    613 		if (err == 0) {
    614 			snm = real;
    615 		} else if (err != ENOTSUP) {
    616 			ZFS_EXIT(zfsvfs);
    617 			return (err);
    618 		}
    619 	}
    620 
    621 	ZFS_EXIT(zfsvfs);
    622 
    623 	err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
    624 	if (!err)
    625 		err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
    626 	if (!err)
    627 		err = zfs_secpolicy_rename_perms(from, to, cr);
    628 	if (err)
    629 		return (err);
    630 
    631 	/*
    632 	 * Cannot move snapshots out of the snapdir.
    633 	 */
    634 	if (sdvp != tdvp)
    635 		return (EINVAL);
    636 
    637 	if (strcmp(snm, tnm) == 0)
    638 		return (0);
    639 
    640 	mutex_enter(&sdp->sd_lock);
    641 
    642 	search.se_name = (char *)snm;
    643 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
    644 		mutex_exit(&sdp->sd_lock);
    645 		return (ENOENT);
    646 	}
    647 
    648 	err = dmu_objset_rename(from, to, B_FALSE);
    649 	if (err == 0)
    650 		zfsctl_rename_snap(sdp, sep, tnm);
    651 
    652 	mutex_exit(&sdp->sd_lock);
    653 
    654 	return (err);
    655 }
    656 
    657 /* ARGSUSED */
    658 static int
    659 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
    660     caller_context_t *ct, int flags)
    661 {
    662 	zfsctl_snapdir_t *sdp = dvp->v_data;
    663 	zfs_snapentry_t *sep;
    664 	zfs_snapentry_t search;
    665 	zfsvfs_t *zfsvfs;
    666 	char snapname[MAXNAMELEN];
    667 	char real[MAXNAMELEN];
    668 	int err;
    669 
    670 	zfsvfs = dvp->v_vfsp->vfs_data;
    671 	ZFS_ENTER(zfsvfs);
    672 
    673 	if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
    674 
    675 		err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
    676 		    MAXNAMELEN, NULL);
    677 		if (err == 0) {
    678 			name = real;
    679 		} else if (err != ENOTSUP) {
    680 			ZFS_EXIT(zfsvfs);
    681 			return (err);
    682 		}
    683 	}
    684 
    685 	ZFS_EXIT(zfsvfs);
    686 
    687 	err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
    688 	if (!err)
    689 		err = zfs_secpolicy_destroy_perms(snapname, cr);
    690 	if (err)
    691 		return (err);
    692 
    693 	mutex_enter(&sdp->sd_lock);
    694 
    695 	search.se_name = name;
    696 	sep = avl_find(&sdp->sd_snaps, &search, NULL);
    697 	if (sep) {
    698 		avl_remove(&sdp->sd_snaps, sep);
    699 		err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
    700 		if (err)
    701 			avl_add(&sdp->sd_snaps, sep);
    702 		else
    703 			err = dmu_objset_destroy(snapname, B_FALSE);
    704 	} else {
    705 		err = ENOENT;
    706 	}
    707 
    708 	mutex_exit(&sdp->sd_lock);
    709 
    710 	return (err);
    711 }
    712 
    713 /*
    714  * This creates a snapshot under '.zfs/snapshot'.
    715  */
    716 /* ARGSUSED */
    717 static int
    718 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,
    719     cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
    720 {
    721 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    722 	char name[MAXNAMELEN];
    723 	int err;
    724 	static enum symfollow follow = NO_FOLLOW;
    725 	static enum uio_seg seg = UIO_SYSSPACE;
    726 
    727 	if (snapshot_namecheck(dirname, NULL, NULL) != 0)
    728 		return (EILSEQ);
    729 
    730 	dmu_objset_name(zfsvfs->z_os, name);
    731 
    732 	*vpp = NULL;
    733 
    734 	err = zfs_secpolicy_snapshot_perms(name, cr);
    735 	if (err)
    736 		return (err);
    737 
    738 	if (err == 0) {
    739 		err = dmu_objset_snapshot(name, dirname, NULL, B_FALSE);
    740 		if (err)
    741 			return (err);
    742 		err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
    743 	}
    744 
    745 	return (err);
    746 }
    747 
    748 /*
    749  * Lookup entry point for the 'snapshot' directory.  Try to open the
    750  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
    751  * Perform a mount of the associated dataset on top of the vnode.
    752  */
    753 /* ARGSUSED */
    754 static int
    755 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    756     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    757     int *direntflags, pathname_t *realpnp)
    758 {
    759 	zfsctl_snapdir_t *sdp = dvp->v_data;
    760 	objset_t *snap;
    761 	char snapname[MAXNAMELEN];
    762 	char real[MAXNAMELEN];
    763 	char *mountpoint;
    764 	zfs_snapentry_t *sep, search;
    765 	struct mounta margs;
    766 	vfs_t *vfsp;
    767 	size_t mountpoint_len;
    768 	avl_index_t where;
    769 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    770 	int err;
    771 
    772 	/*
    773 	 * No extended attributes allowed under .zfs
    774 	 */
    775 	if (flags & LOOKUP_XATTR)
    776 		return (EINVAL);
    777 
    778 	ASSERT(dvp->v_type == VDIR);
    779 
    780 	/*
    781 	 * If we get a recursive call, that means we got called
    782 	 * from the domount() code while it was trying to look up the
    783 	 * spec (which looks like a local path for zfs).  We need to
    784 	 * add some flag to domount() to tell it not to do this lookup.
    785 	 */
    786 	if (MUTEX_HELD(&sdp->sd_lock))
    787 		return (ENOENT);
    788 
    789 	ZFS_ENTER(zfsvfs);
    790 
    791 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
    792 		ZFS_EXIT(zfsvfs);
    793 		return (0);
    794 	}
    795 
    796 	if (flags & FIGNORECASE) {
    797 		boolean_t conflict = B_FALSE;
    798 
    799 		err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
    800 		    MAXNAMELEN, &conflict);
    801 		if (err == 0) {
    802 			nm = real;
    803 		} else if (err != ENOTSUP) {
    804 			ZFS_EXIT(zfsvfs);
    805 			return (err);
    806 		}
    807 		if (realpnp)
    808 			(void) strlcpy(realpnp->pn_buf, nm,
    809 			    realpnp->pn_bufsize);
    810 		if (conflict && direntflags)
    811 			*direntflags = ED_CASE_CONFLICT;
    812 	}
    813 
    814 	mutex_enter(&sdp->sd_lock);
    815 	search.se_name = (char *)nm;
    816 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
    817 		*vpp = sep->se_root;
    818 		VN_HOLD(*vpp);
    819 		err = traverse(vpp);
    820 		if (err) {
    821 			VN_RELE(*vpp);
    822 			*vpp = NULL;
    823 		} else if (*vpp == sep->se_root) {
    824 			/*
    825 			 * The snapshot was unmounted behind our backs,
    826 			 * try to remount it.
    827 			 */
    828 			goto domount;
    829 		} else {
    830 			/*
    831 			 * VROOT was set during the traverse call.  We need
    832 			 * to clear it since we're pretending to be part
    833 			 * of our parent's vfs.
    834 			 */
    835 			(*vpp)->v_flag &= ~VROOT;
    836 		}
    837 		mutex_exit(&sdp->sd_lock);
    838 		ZFS_EXIT(zfsvfs);
    839 		return (err);
    840 	}
    841 
    842 	/*
    843 	 * The requested snapshot is not currently mounted, look it up.
    844 	 */
    845 	err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
    846 	if (err) {
    847 		mutex_exit(&sdp->sd_lock);
    848 		ZFS_EXIT(zfsvfs);
    849 		/*
    850 		 * handle "ls *" or "?" in a graceful manner,
    851 		 * forcing EILSEQ to ENOENT.
    852 		 * Since shell ultimately passes "*" or "?" as name to lookup
    853 		 */
    854 		return (err == EILSEQ ? ENOENT : err);
    855 	}
    856 	if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
    857 		mutex_exit(&sdp->sd_lock);
    858 		ZFS_EXIT(zfsvfs);
    859 		return (ENOENT);
    860 	}
    861 
    862 	sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
    863 	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
    864 	(void) strcpy(sep->se_name, nm);
    865 	*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
    866 	avl_insert(&sdp->sd_snaps, sep, where);
    867 
    868 	dmu_objset_rele(snap, FTAG);
    869 domount:
    870 	mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
    871 	    strlen("/.zfs/snapshot/") + strlen(nm) + 1;
    872 	mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
    873 	(void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
    874 	    refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
    875 
    876 	margs.spec = snapname;
    877 	margs.dir = mountpoint;
    878 	margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
    879 	margs.fstype = "zfs";
    880 	margs.dataptr = NULL;
    881 	margs.datalen = 0;
    882 	margs.optptr = NULL;
    883 	margs.optlen = 0;
    884 
    885 	err = domount("zfs", &margs, *vpp, kcred, &vfsp);
    886 	kmem_free(mountpoint, mountpoint_len);
    887 
    888 	if (err == 0) {
    889 		/*
    890 		 * Return the mounted root rather than the covered mount point.
    891 		 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
    892 		 * the ZFS vnode mounted on top of the GFS node.  This ZFS
    893 		 * vnode is the root of the newly created vfsp.
    894 		 */
    895 		VFS_RELE(vfsp);
    896 		err = traverse(vpp);
    897 	}
    898 
    899 	if (err == 0) {
    900 		/*
    901 		 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
    902 		 *
    903 		 * This is where we lie about our v_vfsp in order to
    904 		 * make .zfs/snapshot/<snapname> accessible over NFS
    905 		 * without requiring manual mounts of <snapname>.
    906 		 */
    907 		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
    908 		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
    909 		(*vpp)->v_vfsp = zfsvfs->z_vfs;
    910 		(*vpp)->v_flag &= ~VROOT;
    911 	}
    912 	mutex_exit(&sdp->sd_lock);
    913 	ZFS_EXIT(zfsvfs);
    914 
    915 	/*
    916 	 * If we had an error, drop our hold on the vnode and
    917 	 * zfsctl_snapshot_inactive() will clean up.
    918 	 */
    919 	if (err) {
    920 		VN_RELE(*vpp);
    921 		*vpp = NULL;
    922 	}
    923 	return (err);
    924 }
    925 
    926 /* ARGSUSED */
    927 static int
    928 zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    929     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    930     int *direntflags, pathname_t *realpnp)
    931 {
    932 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    933 	znode_t *dzp;
    934 	int error;
    935 
    936 	ZFS_ENTER(zfsvfs);
    937 
    938 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
    939 		ZFS_EXIT(zfsvfs);
    940 		return (0);
    941 	}
    942 
    943 	if (zfsvfs->z_shares_dir == 0) {
    944 		ZFS_EXIT(zfsvfs);
    945 		return (ENOTSUP);
    946 	}
    947 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
    948 		error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
    949 		    flags, rdir, cr, ct, direntflags, realpnp);
    950 
    951 	VN_RELE(ZTOV(dzp));
    952 	ZFS_EXIT(zfsvfs);
    953 
    954 	return (error);
    955 }
    956 
    957 /* ARGSUSED */
    958 static int
    959 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
    960     offset_t *offp, offset_t *nextp, void *data, int flags)
    961 {
    962 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    963 	char snapname[MAXNAMELEN];
    964 	uint64_t id, cookie;
    965 	boolean_t case_conflict;
    966 	int error;
    967 
    968 	ZFS_ENTER(zfsvfs);
    969 
    970 	cookie = *offp;
    971 	error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
    972 	    &cookie, &case_conflict);
    973 	if (error) {
    974 		ZFS_EXIT(zfsvfs);
    975 		if (error == ENOENT) {
    976 			*eofp = 1;
    977 			return (0);
    978 		}
    979 		return (error);
    980 	}
    981 
    982 	if (flags & V_RDDIR_ENTFLAGS) {
    983 		edirent_t *eodp = dp;
    984 
    985 		(void) strcpy(eodp->ed_name, snapname);
    986 		eodp->ed_ino = ZFSCTL_INO_SNAP(id);
    987 		eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
    988 	} else {
    989 		struct dirent64 *odp = dp;
    990 
    991 		(void) strcpy(odp->d_name, snapname);
    992 		odp->d_ino = ZFSCTL_INO_SNAP(id);
    993 	}
    994 	*nextp = cookie;
    995 
    996 	ZFS_EXIT(zfsvfs);
    997 
    998 	return (0);
    999 }
   1000 
   1001 /* ARGSUSED */
   1002 static int
   1003 zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
   1004     caller_context_t *ct, int flags)
   1005 {
   1006 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1007 	znode_t *dzp;
   1008 	int error;
   1009 
   1010 	ZFS_ENTER(zfsvfs);
   1011 
   1012 	if (zfsvfs->z_shares_dir == 0) {
   1013 		ZFS_EXIT(zfsvfs);
   1014 		return (ENOTSUP);
   1015 	}
   1016 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
   1017 		error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
   1018 		VN_RELE(ZTOV(dzp));
   1019 	} else {
   1020 		*eofp = 1;
   1021 		error = ENOENT;
   1022 	}
   1023 
   1024 	ZFS_EXIT(zfsvfs);
   1025 	return (error);
   1026 }
   1027 
   1028 /*
   1029  * pvp is the '.zfs' directory (zfsctl_node_t).
   1030  * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
   1031  *
   1032  * This function is the callback to create a GFS vnode for '.zfs/snapshot'
   1033  * when a lookup is performed on .zfs for "snapshot".
   1034  */
   1035 vnode_t *
   1036 zfsctl_mknode_snapdir(vnode_t *pvp)
   1037 {
   1038 	vnode_t *vp;
   1039 	zfsctl_snapdir_t *sdp;
   1040 
   1041 	vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
   1042 	    zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
   1043 	    zfsctl_snapdir_readdir_cb, NULL);
   1044 	sdp = vp->v_data;
   1045 	sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
   1046 	sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
   1047 	mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
   1048 	avl_create(&sdp->sd_snaps, snapentry_compare,
   1049 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
   1050 	return (vp);
   1051 }
   1052 
   1053 vnode_t *
   1054 zfsctl_mknode_shares(vnode_t *pvp)
   1055 {
   1056 	vnode_t *vp;
   1057 	zfsctl_node_t *sdp;
   1058 
   1059 	vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
   1060 	    zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
   1061 	    NULL, NULL);
   1062 	sdp = vp->v_data;
   1063 	sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
   1064 	return (vp);
   1065 
   1066 }
   1067 
   1068 /* ARGSUSED */
   1069 static int
   1070 zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   1071     caller_context_t *ct)
   1072 {
   1073 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1074 	znode_t *dzp;
   1075 	int error;
   1076 
   1077 	ZFS_ENTER(zfsvfs);
   1078 	if (zfsvfs->z_shares_dir == 0) {
   1079 		ZFS_EXIT(zfsvfs);
   1080 		return (ENOTSUP);
   1081 	}
   1082 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
   1083 		error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
   1084 		VN_RELE(ZTOV(dzp));
   1085 	}
   1086 	ZFS_EXIT(zfsvfs);
   1087 	return (error);
   1088 
   1089 
   1090 }
   1091 
   1092 /* ARGSUSED */
   1093 static int
   1094 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   1095     caller_context_t *ct)
   1096 {
   1097 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1098 	zfsctl_snapdir_t *sdp = vp->v_data;
   1099 
   1100 	ZFS_ENTER(zfsvfs);
   1101 	zfsctl_common_getattr(vp, vap);
   1102 	vap->va_nodeid = gfs_file_inode(vp);
   1103 	vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
   1104 	vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
   1105 	ZFS_EXIT(zfsvfs);
   1106 
   1107 	return (0);
   1108 }
   1109 
   1110 /* ARGSUSED */
   1111 static void
   1112 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   1113 {
   1114 	zfsctl_snapdir_t *sdp = vp->v_data;
   1115 	void *private;
   1116 
   1117 	private = gfs_dir_inactive(vp);
   1118 	if (private != NULL) {
   1119 		ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
   1120 		mutex_destroy(&sdp->sd_lock);
   1121 		avl_destroy(&sdp->sd_snaps);
   1122 		kmem_free(private, sizeof (zfsctl_snapdir_t));
   1123 	}
   1124 }
   1125 
   1126 static const fs_operation_def_t zfsctl_tops_snapdir[] = {
   1127 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
   1128 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
   1129 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
   1130 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_snapdir_getattr } },
   1131 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
   1132 	{ VOPNAME_RENAME,	{ .vop_rename = zfsctl_snapdir_rename }	},
   1133 	{ VOPNAME_RMDIR,	{ .vop_rmdir = zfsctl_snapdir_remove }	},
   1134 	{ VOPNAME_MKDIR,	{ .vop_mkdir = zfsctl_snapdir_mkdir }	},
   1135 	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir }	},
   1136 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_snapdir_lookup }	},
   1137 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
   1138 	{ VOPNAME_INACTIVE,	{ .vop_inactive = zfsctl_snapdir_inactive } },
   1139 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_common_fid }	},
   1140 	{ NULL }
   1141 };
   1142 
   1143 static const fs_operation_def_t zfsctl_tops_shares[] = {
   1144 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
   1145 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
   1146 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
   1147 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_shares_getattr } },
   1148 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
   1149 	{ VOPNAME_READDIR,	{ .vop_readdir = zfsctl_shares_readdir } },
   1150 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_shares_lookup }	},
   1151 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
   1152 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive } },
   1153 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_shares_fid } },
   1154 	{ NULL }
   1155 };
   1156 
   1157 /*
   1158  * pvp is the GFS vnode '.zfs/snapshot'.
   1159  *
   1160  * This creates a GFS node under '.zfs/snapshot' representing each
   1161  * snapshot.  This newly created GFS node is what we mount snapshot
   1162  * vfs_t's ontop of.
   1163  */
   1164 static vnode_t *
   1165 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
   1166 {
   1167 	vnode_t *vp;
   1168 	zfsctl_node_t *zcp;
   1169 
   1170 	vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
   1171 	    zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
   1172 	zcp = vp->v_data;
   1173 	zcp->zc_id = objset;
   1174 
   1175 	return (vp);
   1176 }
   1177 
   1178 static void
   1179 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   1180 {
   1181 	zfsctl_snapdir_t *sdp;
   1182 	zfs_snapentry_t *sep, *next;
   1183 	vnode_t *dvp;
   1184 
   1185 	VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
   1186 	sdp = dvp->v_data;
   1187 
   1188 	mutex_enter(&sdp->sd_lock);
   1189 
   1190 	if (vp->v_count > 1) {
   1191 		mutex_exit(&sdp->sd_lock);
   1192 		return;
   1193 	}
   1194 	ASSERT(!vn_ismntpt(vp));
   1195 
   1196 	sep = avl_first(&sdp->sd_snaps);
   1197 	while (sep != NULL) {
   1198 		next = AVL_NEXT(&sdp->sd_snaps, sep);
   1199 
   1200 		if (sep->se_root == vp) {
   1201 			avl_remove(&sdp->sd_snaps, sep);
   1202 			kmem_free(sep->se_name, strlen(sep->se_name) + 1);
   1203 			kmem_free(sep, sizeof (zfs_snapentry_t));
   1204 			break;
   1205 		}
   1206 		sep = next;
   1207 	}
   1208 	ASSERT(sep != NULL);
   1209 
   1210 	mutex_exit(&sdp->sd_lock);
   1211 	VN_RELE(dvp);
   1212 
   1213 	/*
   1214 	 * Dispose of the vnode for the snapshot mount point.
   1215 	 * This is safe to do because once this entry has been removed
   1216 	 * from the AVL tree, it can't be found again, so cannot become
   1217 	 * "active".  If we lookup the same name again we will end up
   1218 	 * creating a new vnode.
   1219 	 */
   1220 	gfs_vop_inactive(vp, cr, ct);
   1221 }
   1222 
   1223 
   1224 /*
   1225  * These VP's should never see the light of day.  They should always
   1226  * be covered.
   1227  */
   1228 static const fs_operation_def_t zfsctl_tops_snapshot[] = {
   1229 	VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive },
   1230 	NULL, NULL
   1231 };
   1232 
   1233 int
   1234 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
   1235 {
   1236 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1237 	vnode_t *dvp, *vp;
   1238 	zfsctl_snapdir_t *sdp;
   1239 	zfsctl_node_t *zcp;
   1240 	zfs_snapentry_t *sep;
   1241 	int error;
   1242 
   1243 	ASSERT(zfsvfs->z_ctldir != NULL);
   1244 	error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
   1245 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
   1246 	if (error != 0)
   1247 		return (error);
   1248 	sdp = dvp->v_data;
   1249 
   1250 	mutex_enter(&sdp->sd_lock);
   1251 	sep = avl_first(&sdp->sd_snaps);
   1252 	while (sep != NULL) {
   1253 		vp = sep->se_root;
   1254 		zcp = vp->v_data;
   1255 		if (zcp->zc_id == objsetid)
   1256 			break;
   1257 
   1258 		sep = AVL_NEXT(&sdp->sd_snaps, sep);
   1259 	}
   1260 
   1261 	if (sep != NULL) {
   1262 		VN_HOLD(vp);
   1263 		/*
   1264 		 * Return the mounted root rather than the covered mount point.
   1265 		 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
   1266 		 * and returns the ZFS vnode mounted on top of the GFS node.
   1267 		 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
   1268 		 */
   1269 		error = traverse(&vp);
   1270 		if (error == 0) {
   1271 			if (vp == sep->se_root)
   1272 				error = EINVAL;
   1273 			else
   1274 				*zfsvfsp = VTOZ(vp)->z_zfsvfs;
   1275 		}
   1276 		mutex_exit(&sdp->sd_lock);
   1277 		VN_RELE(vp);
   1278 	} else {
   1279 		error = EINVAL;
   1280 		mutex_exit(&sdp->sd_lock);
   1281 	}
   1282 
   1283 	VN_RELE(dvp);
   1284 
   1285 	return (error);
   1286 }
   1287 
   1288 /*
   1289  * Unmount any snapshots for the given filesystem.  This is called from
   1290  * zfs_umount() - if we have a ctldir, then go through and unmount all the
   1291  * snapshots.
   1292  */
   1293 int
   1294 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
   1295 {
   1296 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1297 	vnode_t *dvp;
   1298 	zfsctl_snapdir_t *sdp;
   1299 	zfs_snapentry_t *sep, *next;
   1300 	int error;
   1301 
   1302 	ASSERT(zfsvfs->z_ctldir != NULL);
   1303 	error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
   1304 	    NULL, 0, NULL, cr, NULL, NULL, NULL);
   1305 	if (error != 0)
   1306 		return (error);
   1307 	sdp = dvp->v_data;
   1308 
   1309 	mutex_enter(&sdp->sd_lock);
   1310 
   1311 	sep = avl_first(&sdp->sd_snaps);
   1312 	while (sep != NULL) {
   1313 		next = AVL_NEXT(&sdp->sd_snaps, sep);
   1314 
   1315 		/*
   1316 		 * If this snapshot is not mounted, then it must
   1317 		 * have just been unmounted by somebody else, and
   1318 		 * will be cleaned up by zfsctl_snapdir_inactive().
   1319 		 */
   1320 		if (vn_ismntpt(sep->se_root)) {
   1321 			avl_remove(&sdp->sd_snaps, sep);
   1322 			error = zfsctl_unmount_snap(sep, fflags, cr);
   1323 			if (error) {
   1324 				avl_add(&sdp->sd_snaps, sep);
   1325 				break;
   1326 			}
   1327 		}
   1328 		sep = next;
   1329 	}
   1330 
   1331 	mutex_exit(&sdp->sd_lock);
   1332 	VN_RELE(dvp);
   1333 
   1334 	return (error);
   1335 }
   1336