OpenGrok

Cross Reference: zfs_ctldir.c
xref: /onnv/onnv-gate/usr/src/uts/common/fs/zfs/zfs_ctldir.c
Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
     23  */
     24 
     25 /*
     26  * ZFS control directory (a.k.a. ".zfs")
     27  *
     28  * This directory provides a common location for all ZFS meta-objects.
     29  * Currently, this is only the 'snapshot' directory, but this may expand in the
     30  * future.  The elements are built using the GFS primitives, as the hierarchy
     31  * does not actually exist on disk.
     32  *
     33  * For 'snapshot', we don't want to have all snapshots always mounted, because
     34  * this would take up a huge amount of space in /etc/mnttab.  We have three
     35  * types of objects:
     36  *
     37  * 	ctldir ------> snapshotdir -------> snapshot
     38  *                                             |
     39  *                                             |
     40  *                                             V
     41  *                                         mounted fs
     42  *
     43  * The 'snapshot' node contains just enough information to lookup '..' and act
     44  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
     45  * perform an automount of the underlying filesystem and return the
     46  * corresponding vnode.
     47  *
     48  * All mounts are handled automatically by the kernel, but unmounts are
     49  * (currently) handled from user land.  The main reason is that there is no
     50  * reliable way to auto-unmount the filesystem when it's "no longer in use".
     51  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
     52  * unmounts any snapshots within the snapshot directory.
     53  *
     54  * The '.zfs', '.zfs/snapshot', and all directories created under
     55  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
     56  * share the same vfs_t as the head filesystem (what '.zfs' lives under).
     57  *
     58  * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
     59  * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
     60  * However, vnodes within these mounted on file systems have their v_vfsp
     61  * fields set to the head filesystem to make NFS happy (see
     62  * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
     63  * so that it cannot be freed until all snapshots have been unmounted.
     64  */
     65 
     66 #include <fs/fs_subr.h>
     67 #include <sys/zfs_ctldir.h>
     68 #include <sys/zfs_ioctl.h>
     69 #include <sys/zfs_vfsops.h>
     70 #include <sys/vfs_opreg.h>
     71 #include <sys/gfs.h>
     72 #include <sys/stat.h>
     73 #include <sys/dmu.h>
     74 #include <sys/dsl_deleg.h>
     75 #include <sys/mount.h>
     76 #include <sys/sunddi.h>
     77 
     78 #include "zfs_namecheck.h"
     79 
     80 typedef struct zfsctl_node {
     81 	gfs_dir_t	zc_gfs_private;
     82 	uint64_t	zc_id;
     83 	timestruc_t	zc_cmtime;	/* ctime and mtime, always the same */
     84 } zfsctl_node_t;
     85 
     86 typedef struct zfsctl_snapdir {
     87 	zfsctl_node_t	sd_node;
     88 	kmutex_t	sd_lock;
     89 	avl_tree_t	sd_snaps;
     90 } zfsctl_snapdir_t;
     91 
     92 typedef struct {
     93 	char		*se_name;
     94 	vnode_t		*se_root;
     95 	avl_node_t	se_node;
     96 } zfs_snapentry_t;
     97 
     98 static int
     99 snapentry_compare(const void *a, const void *b)
    100 {
    101 	const zfs_snapentry_t *sa = a;
    102 	const zfs_snapentry_t *sb = b;
    103 	int ret = strcmp(sa->se_name, sb->se_name);
    104 
    105 	if (ret < 0)
    106 		return (-1);
    107 	else if (ret > 0)
    108 		return (1);
    109 	else
    110 		return (0);
    111 }
    112 
    113 vnodeops_t *zfsctl_ops_root;
    114 vnodeops_t *zfsctl_ops_snapdir;
    115 vnodeops_t *zfsctl_ops_snapshot;
    116 vnodeops_t *zfsctl_ops_shares;
    117 vnodeops_t *zfsctl_ops_shares_dir;
    118 
    119 static const fs_operation_def_t zfsctl_tops_root[];
    120 static const fs_operation_def_t zfsctl_tops_snapdir[];
    121 static const fs_operation_def_t zfsctl_tops_snapshot[];
    122 static const fs_operation_def_t zfsctl_tops_shares[];
    123 
    124 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
    125 static vnode_t *zfsctl_mknode_shares(vnode_t *);
    126 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
    127 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
    128 
    129 static gfs_opsvec_t zfsctl_opsvec[] = {
    130 	{ ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
    131 	{ ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
    132 	{ ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
    133 	{ ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
    134 	{ ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
    135 	{ NULL }
    136 };
    137 
    138 /*
    139  * Root directory elements.  We only have two entries
    140  * snapshot and shares.
    141  */
    142 static gfs_dirent_t zfsctl_root_entries[] = {
    143 	{ "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
    144 	{ "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
    145 	{ NULL }
    146 };
    147 
    148 /* include . and .. in the calculation */
    149 #define	NROOT_ENTRIES	((sizeof (zfsctl_root_entries) / \
    150     sizeof (gfs_dirent_t)) + 1)
    151 
    152 
    153 /*
    154  * Initialize the various GFS pieces we'll need to create and manipulate .zfs
    155  * directories.  This is called from the ZFS init routine, and initializes the
    156  * vnode ops vectors that we'll be using.
    157  */
    158 void
    159 zfsctl_init(void)
    160 {
    161 	VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
    162 }
    163 
    164 void
    165 zfsctl_fini(void)
    166 {
    167 	/*
    168 	 * Remove vfsctl vnode ops
    169 	 */
    170 	if (zfsctl_ops_root)
    171 		vn_freevnodeops(zfsctl_ops_root);
    172 	if (zfsctl_ops_snapdir)
    173 		vn_freevnodeops(zfsctl_ops_snapdir);
    174 	if (zfsctl_ops_snapshot)
    175 		vn_freevnodeops(zfsctl_ops_snapshot);
    176 	if (zfsctl_ops_shares)
    177 		vn_freevnodeops(zfsctl_ops_shares);
    178 	if (zfsctl_ops_shares_dir)
    179 		vn_freevnodeops(zfsctl_ops_shares_dir);
    180 
    181 	zfsctl_ops_root = NULL;
    182 	zfsctl_ops_snapdir = NULL;
    183 	zfsctl_ops_snapshot = NULL;
    184 	zfsctl_ops_shares = NULL;
    185 	zfsctl_ops_shares_dir = NULL;
    186 }
    187 
    188 boolean_t
    189 zfsctl_is_node(vnode_t *vp)
    190 {
    191 	return (vn_matchops(vp, zfsctl_ops_root) ||
    192 	    vn_matchops(vp, zfsctl_ops_snapdir) ||
    193 	    vn_matchops(vp, zfsctl_ops_snapshot) ||
    194 	    vn_matchops(vp, zfsctl_ops_shares) ||
    195 	    vn_matchops(vp, zfsctl_ops_shares_dir));
    196 
    197 }
    198 
    199 /*
    200  * Return the inode number associated with the 'snapshot' or
    201  * 'shares' directory.
    202  */
    203 /* ARGSUSED */
    204 static ino64_t
    205 zfsctl_root_inode_cb(vnode_t *vp, int index)
    206 {
    207 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    208 
    209 	ASSERT(index <= 2);
    210 
    211 	if (index == 0)
    212 		return (ZFSCTL_INO_SNAPDIR);
    213 
    214 	return (zfsvfs->z_shares_dir);
    215 }
    216 
    217 /*
    218  * Create the '.zfs' directory.  This directory is cached as part of the VFS
    219  * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
    220  * therefore checks against a vfs_count of 2 instead of 1.  This reference
    221  * is removed when the ctldir is destroyed in the unmount.
    222  */
    223 void
    224 zfsctl_create(zfsvfs_t *zfsvfs)
    225 {
    226 	vnode_t *vp, *rvp;
    227 	zfsctl_node_t *zcp;
    228 	uint64_t crtime[2];
    229 
    230 	ASSERT(zfsvfs->z_ctldir == NULL);
    231 
    232 	vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
    233 	    zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
    234 	    zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
    235 	zcp = vp->v_data;
    236 	zcp->zc_id = ZFSCTL_INO_ROOT;
    237 
    238 	VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
    239 	VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
    240 	    &crtime, sizeof (crtime)));
    241 	ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
    242 	VN_RELE(rvp);
    243 
    244 	/*
    245 	 * We're only faking the fact that we have a root of a filesystem for
    246 	 * the sake of the GFS interfaces.  Undo the flag manipulation it did
    247 	 * for us.
    248 	 */
    249 	vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
    250 
    251 	zfsvfs->z_ctldir = vp;
    252 }
    253 
    254 /*
    255  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
    256  * There might still be more references if we were force unmounted, but only
    257  * new zfs_inactive() calls can occur and they don't reference .zfs
    258  */
    259 void
    260 zfsctl_destroy(zfsvfs_t *zfsvfs)
    261 {
    262 	VN_RELE(zfsvfs->z_ctldir);
    263 	zfsvfs->z_ctldir = NULL;
    264 }
    265 
    266 /*
    267  * Given a root znode, retrieve the associated .zfs directory.
    268  * Add a hold to the vnode and return it.
    269  */
    270 vnode_t *
    271 zfsctl_root(znode_t *zp)
    272 {
    273 	ASSERT(zfs_has_ctldir(zp));
    274 	VN_HOLD(zp->z_zfsvfs->z_ctldir);
    275 	return (zp->z_zfsvfs->z_ctldir);
    276 }
    277 
    278 /*
    279  * Common open routine.  Disallow any write access.
    280  */
    281 /* ARGSUSED */
    282 static int
    283 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
    284 {
    285 	if (flags & FWRITE)
    286 		return (EACCES);
    287 
    288 	return (0);
    289 }
    290 
    291 /*
    292  * Common close routine.  Nothing to do here.
    293  */
    294 /* ARGSUSED */
    295 static int
    296 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
    297     cred_t *cr, caller_context_t *ct)
    298 {
    299 	return (0);
    300 }
    301 
    302 /*
    303  * Common access routine.  Disallow writes.
    304  */
    305 /* ARGSUSED */
    306 static int
    307 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
    308     caller_context_t *ct)
    309 {
    310 	if (flags & V_ACE_MASK) {
    311 		if (mode & ACE_ALL_WRITE_PERMS)
    312 			return (EACCES);
    313 	} else {
    314 		if (mode & VWRITE)
    315 			return (EACCES);
    316 	}
    317 
    318 	return (0);
    319 }
    320 
    321 /*
    322  * Common getattr function.  Fill in basic information.
    323  */
    324 static void
    325 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
    326 {
    327 	timestruc_t	now;
    328 
    329 	vap->va_uid = 0;
    330 	vap->va_gid = 0;
    331 	vap->va_rdev = 0;
    332 	/*
    333 	 * We are a purely virtual object, so we have no
    334 	 * blocksize or allocated blocks.
    335 	 */
    336 	vap->va_blksize = 0;
    337 	vap->va_nblocks = 0;
    338 	vap->va_seq = 0;
    339 	vap->va_fsid = vp->v_vfsp->vfs_dev;
    340 	vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
    341 	    S_IROTH | S_IXOTH;
    342 	vap->va_type = VDIR;
    343 	/*
    344 	 * We live in the now (for atime).
    345 	 */
    346 	gethrestime(&now);
    347 	vap->va_atime = now;
    348 }
    349 
    350 /*ARGSUSED*/
    351 static int
    352 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
    353 {
    354 	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
    355 	zfsctl_node_t	*zcp = vp->v_data;
    356 	uint64_t	object = zcp->zc_id;
    357 	zfid_short_t	*zfid;
    358 	int		i;
    359 
    360 	ZFS_ENTER(zfsvfs);
    361 
    362 	if (fidp->fid_len < SHORT_FID_LEN) {
    363 		fidp->fid_len = SHORT_FID_LEN;
    364 		ZFS_EXIT(zfsvfs);
    365 		return (ENOSPC);
    366 	}
    367 
    368 	zfid = (zfid_short_t *)fidp;
    369 
    370 	zfid->zf_len = SHORT_FID_LEN;
    371 
    372 	for (i = 0; i < sizeof (zfid->zf_object); i++)
    373 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
    374 
    375 	/* .zfs znodes always have a generation number of 0 */
    376 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
    377 		zfid->zf_gen[i] = 0;
    378 
    379 	ZFS_EXIT(zfsvfs);
    380 	return (0);
    381 }
    382 
    383 
    384 /*ARGSUSED*/
    385 static int
    386 zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
    387 {
    388 	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
    389 	znode_t		*dzp;
    390 	int		error;
    391 
    392 	ZFS_ENTER(zfsvfs);
    393 
    394 	if (zfsvfs->z_shares_dir == 0) {
    395 		ZFS_EXIT(zfsvfs);
    396 		return (ENOTSUP);
    397 	}
    398 
    399 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
    400 		error = VOP_FID(ZTOV(dzp), fidp, ct);
    401 		VN_RELE(ZTOV(dzp));
    402 	}
    403 
    404 	ZFS_EXIT(zfsvfs);
    405 	return (error);
    406 }
    407 /*
    408  * .zfs inode namespace
    409  *
    410  * We need to generate unique inode numbers for all files and directories
    411  * within the .zfs pseudo-filesystem.  We use the following scheme:
    412  *
    413  * 	ENTRY			ZFSCTL_INODE
    414  * 	.zfs			1
    415  * 	.zfs/snapshot		2
    416  * 	.zfs/snapshot/<snap>	objectid(snap)
    417  */
    418 
    419 #define	ZFSCTL_INO_SNAP(id)	(id)
    420 
    421 /*
    422  * Get root directory attributes.
    423  */
    424 /* ARGSUSED */
    425 static int
    426 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    427     caller_context_t *ct)
    428 {
    429 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    430 	zfsctl_node_t *zcp = vp->v_data;
    431 
    432 	ZFS_ENTER(zfsvfs);
    433 	vap->va_nodeid = ZFSCTL_INO_ROOT;
    434 	vap->va_nlink = vap->va_size = NROOT_ENTRIES;
    435 	vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
    436 
    437 	zfsctl_common_getattr(vp, vap);
    438 	ZFS_EXIT(zfsvfs);
    439 
    440 	return (0);
    441 }
    442 
    443 /*
    444  * Special case the handling of "..".
    445  */
    446 /* ARGSUSED */
    447 int
    448 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    449     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    450     int *direntflags, pathname_t *realpnp)
    451 {
    452 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    453 	int err;
    454 
    455 	/*
    456 	 * No extended attributes allowed under .zfs
    457 	 */
    458 	if (flags & LOOKUP_XATTR)
    459 		return (EINVAL);
    460 
    461 	ZFS_ENTER(zfsvfs);
    462 
    463 	if (strcmp(nm, "..") == 0) {
    464 		err = VFS_ROOT(dvp->v_vfsp, vpp);
    465 	} else {
    466 		err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
    467 		    cr, ct, direntflags, realpnp);
    468 	}
    469 
    470 	ZFS_EXIT(zfsvfs);
    471 
    472 	return (err);
    473 }
    474 
    475 static int
    476 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
    477     caller_context_t *ct)
    478 {
    479 	/*
    480 	 * We only care about ACL_ENABLED so that libsec can
    481 	 * display ACL correctly and not default to POSIX draft.
    482 	 */
    483 	if (cmd == _PC_ACL_ENABLED) {
    484 		*valp = _ACL_ACE_ENABLED;
    485 		return (0);
    486 	}
    487 
    488 	return (fs_pathconf(vp, cmd, valp, cr, ct));
    489 }
    490 
    491 static const fs_operation_def_t zfsctl_tops_root[] = {
    492 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
    493 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
    494 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
    495 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_root_getattr }	},
    496 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
    497 	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir } 	},
    498 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_root_lookup }	},
    499 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
    500 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
    501 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = zfsctl_pathconf }	},
    502 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_common_fid	}	},
    503 	{ NULL }
    504 };
    505 
    506 static int
    507 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
    508 {
    509 	objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
    510 
    511 	if (snapshot_namecheck(name, NULL, NULL) != 0)
    512 		return (EILSEQ);
    513 	dmu_objset_name(os, zname);
    514 	if (strlen(zname) + 1 + strlen(name) >= len)
    515 		return (ENAMETOOLONG);
    516 	(void) strcat(zname, "@");
    517 	(void) strcat(zname, name);
    518 	return (0);
    519 }
    520 
    521 static int
    522 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
    523 {
    524 	vnode_t *svp = sep->se_root;
    525 	int error;
    526 
    527 	ASSERT(vn_ismntpt(svp));
    528 
    529 	/* this will be dropped by dounmount() */
    530 	if ((error = vn_vfswlock(svp)) != 0)
    531 		return (error);
    532 
    533 	VN_HOLD(svp);
    534 	error = dounmount(vn_mountedvfs(svp), fflags, cr);
    535 	if (error) {
    536 		VN_RELE(svp);
    537 		return (error);
    538 	}
    539 
    540 	/*
    541 	 * We can't use VN_RELE(), as that will try to invoke
    542 	 * zfsctl_snapdir_inactive(), which would cause us to destroy
    543 	 * the sd_lock mutex held by our caller.
    544 	 */
    545 	ASSERT(svp->v_count == 1);
    546 	gfs_vop_inactive(svp, cr, NULL);
    547 
    548 	kmem_free(sep->se_name, strlen(sep->se_name) + 1);
    549 	kmem_free(sep, sizeof (zfs_snapentry_t));
    550 
    551 	return (0);
    552 }
    553 
    554 static void
    555 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
    556 {
    557 	avl_index_t where;
    558 	vfs_t *vfsp;
    559 	refstr_t *pathref;
    560 	char newpath[MAXNAMELEN];
    561 	char *tail;
    562 
    563 	ASSERT(MUTEX_HELD(&sdp->sd_lock));
    564 	ASSERT(sep != NULL);
    565 
    566 	vfsp = vn_mountedvfs(sep->se_root);
    567 	ASSERT(vfsp != NULL);
    568 
    569 	vfs_lock_wait(vfsp);
    570 
    571 	/*
    572 	 * Change the name in the AVL tree.
    573 	 */
    574 	avl_remove(&sdp->sd_snaps, sep);
    575 	kmem_free(sep->se_name, strlen(sep->se_name) + 1);
    576 	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
    577 	(void) strcpy(sep->se_name, nm);
    578 	VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
    579 	avl_insert(&sdp->sd_snaps, sep, where);
    580 
    581 	/*
    582 	 * Change the current mountpoint info:
    583 	 * 	- update the tail of the mntpoint path
    584 	 *	- update the tail of the resource path
    585 	 */
    586 	pathref = vfs_getmntpoint(vfsp);
    587 	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
    588 	VERIFY((tail = strrchr(newpath, '/')) != NULL);
    589 	*(tail+1) = '\0';
    590 	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
    591 	(void) strcat(newpath, nm);
    592 	refstr_rele(pathref);
    593 	vfs_setmntpoint(vfsp, newpath, 0);
    594 
    595 	pathref = vfs_getresource(vfsp);
    596 	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
    597 	VERIFY((tail = strrchr(newpath, '@')) != NULL);
    598 	*(tail+1) = '\0';
    599 	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
    600 	(void) strcat(newpath, nm);
    601 	refstr_rele(pathref);
    602 	vfs_setresource(vfsp, newpath, 0);
    603 
    604 	vfs_unlock(vfsp);
    605 }
    606 
    607 /*ARGSUSED*/
    608 static int
    609 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
    610     cred_t *cr, caller_context_t *ct, int flags)
    611 {
    612 	zfsctl_snapdir_t *sdp = sdvp->v_data;
    613 	zfs_snapentry_t search, *sep;
    614 	zfsvfs_t *zfsvfs;
    615 	avl_index_t where;
    616 	char from[MAXNAMELEN], to[MAXNAMELEN];
    617 	char real[MAXNAMELEN];
    618 	int err;
    619 
    620 	zfsvfs = sdvp->v_vfsp->vfs_data;
    621 	ZFS_ENTER(zfsvfs);
    622 
    623 	if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
    624 		err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
    625 		    MAXNAMELEN, NULL);
    626 		if (err == 0) {
    627 			snm = real;
    628 		} else if (err != ENOTSUP) {
    629 			ZFS_EXIT(zfsvfs);
    630 			return (err);
    631 		}
    632 	}
    633 
    634 	ZFS_EXIT(zfsvfs);
    635 
    636 	err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
    637 	if (!err)
    638 		err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
    639 	if (!err)
    640 		err = zfs_secpolicy_rename_perms(from, to, cr);
    641 	if (err)
    642 		return (err);
    643 
    644 	/*
    645 	 * Cannot move snapshots out of the snapdir.
    646 	 */
    647 	if (sdvp != tdvp)
    648 		return (EINVAL);
    649 
    650 	if (strcmp(snm, tnm) == 0)
    651 		return (0);
    652 
    653 	mutex_enter(&sdp->sd_lock);
    654 
    655 	search.se_name = (char *)snm;
    656 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
    657 		mutex_exit(&sdp->sd_lock);
    658 		return (ENOENT);
    659 	}
    660 
    661 	err = dmu_objset_rename(from, to, B_FALSE);
    662 	if (err == 0)
    663 		zfsctl_rename_snap(sdp, sep, tnm);
    664 
    665 	mutex_exit(&sdp->sd_lock);
    666 
    667 	return (err);
    668 }
    669 
    670 /* ARGSUSED */
    671 static int
    672 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
    673     caller_context_t *ct, int flags)
    674 {
    675 	zfsctl_snapdir_t *sdp = dvp->v_data;
    676 	zfs_snapentry_t *sep;
    677 	zfs_snapentry_t search;
    678 	zfsvfs_t *zfsvfs;
    679 	char snapname[MAXNAMELEN];
    680 	char real[MAXNAMELEN];
    681 	int err;
    682 
    683 	zfsvfs = dvp->v_vfsp->vfs_data;
    684 	ZFS_ENTER(zfsvfs);
    685 
    686 	if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
    687 
    688 		err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
    689 		    MAXNAMELEN, NULL);
    690 		if (err == 0) {
    691 			name = real;
    692 		} else if (err != ENOTSUP) {
    693 			ZFS_EXIT(zfsvfs);
    694 			return (err);
    695 		}
    696 	}
    697 
    698 	ZFS_EXIT(zfsvfs);
    699 
    700 	err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
    701 	if (!err)
    702 		err = zfs_secpolicy_destroy_perms(snapname, cr);
    703 	if (err)
    704 		return (err);
    705 
    706 	mutex_enter(&sdp->sd_lock);
    707 
    708 	search.se_name = name;
    709 	sep = avl_find(&sdp->sd_snaps, &search, NULL);
    710 	if (sep) {
    711 		avl_remove(&sdp->sd_snaps, sep);
    712 		err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
    713 		if (err)
    714 			avl_add(&sdp->sd_snaps, sep);
    715 		else
    716 			err = dmu_objset_destroy(snapname, B_FALSE);
    717 	} else {
    718 		err = ENOENT;
    719 	}
    720 
    721 	mutex_exit(&sdp->sd_lock);
    722 
    723 	return (err);
    724 }
    725 
    726 /*
    727  * This creates a snapshot under '.zfs/snapshot'.
    728  */
    729 /* ARGSUSED */
    730 static int
    731 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,
    732     cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
    733 {
    734 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    735 	char name[MAXNAMELEN];
    736 	int err;
    737 	static enum symfollow follow = NO_FOLLOW;
    738 	static enum uio_seg seg = UIO_SYSSPACE;
    739 
    740 	if (snapshot_namecheck(dirname, NULL, NULL) != 0)
    741 		return (EILSEQ);
    742 
    743 	dmu_objset_name(zfsvfs->z_os, name);
    744 
    745 	*vpp = NULL;
    746 
    747 	err = zfs_secpolicy_snapshot_perms(name, cr);
    748 	if (err)
    749 		return (err);
    750 
    751 	if (err == 0) {
    752 		err = dmu_objset_snapshot(name, dirname, NULL, NULL,
    753 		    B_FALSE, B_FALSE, -1);
    754 		if (err)
    755 			return (err);
    756 		err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
    757 	}
    758 
    759 	return (err);
    760 }
    761 
    762 /*
    763  * Lookup entry point for the 'snapshot' directory.  Try to open the
    764  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
    765  * Perform a mount of the associated dataset on top of the vnode.
    766  */
    767 /* ARGSUSED */
    768 static int
    769 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    770     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    771     int *direntflags, pathname_t *realpnp)
    772 {
    773 	zfsctl_snapdir_t *sdp = dvp->v_data;
    774 	objset_t *snap;
    775 	char snapname[MAXNAMELEN];
    776 	char real[MAXNAMELEN];
    777 	char *mountpoint;
    778 	zfs_snapentry_t *sep, search;
    779 	struct mounta margs;
    780 	vfs_t *vfsp;
    781 	size_t mountpoint_len;
    782 	avl_index_t where;
    783 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    784 	int err;
    785 
    786 	/*
    787 	 * No extended attributes allowed under .zfs
    788 	 */
    789 	if (flags & LOOKUP_XATTR)
    790 		return (EINVAL);
    791 
    792 	ASSERT(dvp->v_type == VDIR);
    793 
    794 	/*
    795 	 * If we get a recursive call, that means we got called
    796 	 * from the domount() code while it was trying to look up the
    797 	 * spec (which looks like a local path for zfs).  We need to
    798 	 * add some flag to domount() to tell it not to do this lookup.
    799 	 */
    800 	if (MUTEX_HELD(&sdp->sd_lock))
    801 		return (ENOENT);
    802 
    803 	ZFS_ENTER(zfsvfs);
    804 
    805 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
    806 		ZFS_EXIT(zfsvfs);
    807 		return (0);
    808 	}
    809 
    810 	if (flags & FIGNORECASE) {
    811 		boolean_t conflict = B_FALSE;
    812 
    813 		err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
    814 		    MAXNAMELEN, &conflict);
    815 		if (err == 0) {
    816 			nm = real;
    817 		} else if (err != ENOTSUP) {
    818 			ZFS_EXIT(zfsvfs);
    819 			return (err);
    820 		}
    821 		if (realpnp)
    822 			(void) strlcpy(realpnp->pn_buf, nm,
    823 			    realpnp->pn_bufsize);
    824 		if (conflict && direntflags)
    825 			*direntflags = ED_CASE_CONFLICT;
    826 	}
    827 
    828 	mutex_enter(&sdp->sd_lock);
    829 	search.se_name = (char *)nm;
    830 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
    831 		*vpp = sep->se_root;
    832 		VN_HOLD(*vpp);
    833 		err = traverse(vpp);
    834 		if (err) {
    835 			VN_RELE(*vpp);
    836 			*vpp = NULL;
    837 		} else if (*vpp == sep->se_root) {
    838 			/*
    839 			 * The snapshot was unmounted behind our backs,
    840 			 * try to remount it.
    841 			 */
    842 			goto domount;
    843 		} else {
    844 			/*
    845 			 * VROOT was set during the traverse call.  We need
    846 			 * to clear it since we're pretending to be part
    847 			 * of our parent's vfs.
    848 			 */
    849 			(*vpp)->v_flag &= ~VROOT;
    850 		}
    851 		mutex_exit(&sdp->sd_lock);
    852 		ZFS_EXIT(zfsvfs);
    853 		return (err);
    854 	}
    855 
    856 	/*
    857 	 * The requested snapshot is not currently mounted, look it up.
    858 	 */
    859 	err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
    860 	if (err) {
    861 		mutex_exit(&sdp->sd_lock);
    862 		ZFS_EXIT(zfsvfs);
    863 		/*
    864 		 * handle "ls *" or "?" in a graceful manner,
    865 		 * forcing EILSEQ to ENOENT.
    866 		 * Since shell ultimately passes "*" or "?" as name to lookup
    867 		 */
    868 		return (err == EILSEQ ? ENOENT : err);
    869 	}
    870 	if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
    871 		mutex_exit(&sdp->sd_lock);
    872 		ZFS_EXIT(zfsvfs);
    873 		return (ENOENT);
    874 	}
    875 
    876 	sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
    877 	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
    878 	(void) strcpy(sep->se_name, nm);
    879 	*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
    880 	avl_insert(&sdp->sd_snaps, sep, where);
    881 
    882 	dmu_objset_rele(snap, FTAG);
    883 domount:
    884 	mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
    885 	    strlen("/.zfs/snapshot/") + strlen(nm) + 1;
    886 	mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
    887 	(void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
    888 	    refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
    889 
    890 	margs.spec = snapname;
    891 	margs.dir = mountpoint;
    892 	margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
    893 	margs.fstype = "zfs";
    894 	margs.dataptr = NULL;
    895 	margs.datalen = 0;
    896 	margs.optptr = NULL;
    897 	margs.optlen = 0;
    898 
    899 	err = domount("zfs", &margs, *vpp, kcred, &vfsp);
    900 	kmem_free(mountpoint, mountpoint_len);
    901 
    902 	if (err == 0) {
    903 		/*
    904 		 * Return the mounted root rather than the covered mount point.
    905 		 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
    906 		 * the ZFS vnode mounted on top of the GFS node.  This ZFS
    907 		 * vnode is the root of the newly created vfsp.
    908 		 */
    909 		VFS_RELE(vfsp);
    910 		err = traverse(vpp);
    911 	}
    912 
    913 	if (err == 0) {
    914 		/*
    915 		 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
    916 		 *
    917 		 * This is where we lie about our v_vfsp in order to
    918 		 * make .zfs/snapshot/<snapname> accessible over NFS
    919 		 * without requiring manual mounts of <snapname>.
    920 		 */
    921 		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
    922 		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
    923 		(*vpp)->v_vfsp = zfsvfs->z_vfs;
    924 		(*vpp)->v_flag &= ~VROOT;
    925 	}
    926 	mutex_exit(&sdp->sd_lock);
    927 	ZFS_EXIT(zfsvfs);
    928 
    929 	/*
    930 	 * If we had an error, drop our hold on the vnode and
    931 	 * zfsctl_snapshot_inactive() will clean up.
    932 	 */
    933 	if (err) {
    934 		VN_RELE(*vpp);
    935 		*vpp = NULL;
    936 	}
    937 	return (err);
    938 }
    939 
    940 /* ARGSUSED */
    941 static int
    942 zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
    943     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
    944     int *direntflags, pathname_t *realpnp)
    945 {
    946 	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
    947 	znode_t *dzp;
    948 	int error;
    949 
    950 	ZFS_ENTER(zfsvfs);
    951 
    952 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
    953 		ZFS_EXIT(zfsvfs);
    954 		return (0);
    955 	}
    956 
    957 	if (zfsvfs->z_shares_dir == 0) {
    958 		ZFS_EXIT(zfsvfs);
    959 		return (ENOTSUP);
    960 	}
    961 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
    962 		error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
    963 		    flags, rdir, cr, ct, direntflags, realpnp);
    964 
    965 	VN_RELE(ZTOV(dzp));
    966 	ZFS_EXIT(zfsvfs);
    967 
    968 	return (error);
    969 }
    970 
    971 /* ARGSUSED */
    972 static int
    973 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
    974     offset_t *offp, offset_t *nextp, void *data, int flags)
    975 {
    976 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
    977 	char snapname[MAXNAMELEN];
    978 	uint64_t id, cookie;
    979 	boolean_t case_conflict;
    980 	int error;
    981 
    982 	ZFS_ENTER(zfsvfs);
    983 
    984 	cookie = *offp;
    985 	error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
    986 	    &cookie, &case_conflict);
    987 	if (error) {
    988 		ZFS_EXIT(zfsvfs);
    989 		if (error == ENOENT) {
    990 			*eofp = 1;
    991 			return (0);
    992 		}
    993 		return (error);
    994 	}
    995 
    996 	if (flags & V_RDDIR_ENTFLAGS) {
    997 		edirent_t *eodp = dp;
    998 
    999 		(void) strcpy(eodp->ed_name, snapname);
   1000 		eodp->ed_ino = ZFSCTL_INO_SNAP(id);
   1001 		eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
   1002 	} else {
   1003 		struct dirent64 *odp = dp;
   1004 
   1005 		(void) strcpy(odp->d_name, snapname);
   1006 		odp->d_ino = ZFSCTL_INO_SNAP(id);
   1007 	}
   1008 	*nextp = cookie;
   1009 
   1010 	ZFS_EXIT(zfsvfs);
   1011 
   1012 	return (0);
   1013 }
   1014 
   1015 /* ARGSUSED */
   1016 static int
   1017 zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
   1018     caller_context_t *ct, int flags)
   1019 {
   1020 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1021 	znode_t *dzp;
   1022 	int error;
   1023 
   1024 	ZFS_ENTER(zfsvfs);
   1025 
   1026 	if (zfsvfs->z_shares_dir == 0) {
   1027 		ZFS_EXIT(zfsvfs);
   1028 		return (ENOTSUP);
   1029 	}
   1030 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
   1031 		error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
   1032 		VN_RELE(ZTOV(dzp));
   1033 	} else {
   1034 		*eofp = 1;
   1035 		error = ENOENT;
   1036 	}
   1037 
   1038 	ZFS_EXIT(zfsvfs);
   1039 	return (error);
   1040 }
   1041 
   1042 /*
   1043  * pvp is the '.zfs' directory (zfsctl_node_t).
   1044  * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
   1045  *
   1046  * This function is the callback to create a GFS vnode for '.zfs/snapshot'
   1047  * when a lookup is performed on .zfs for "snapshot".
   1048  */
   1049 vnode_t *
   1050 zfsctl_mknode_snapdir(vnode_t *pvp)
   1051 {
   1052 	vnode_t *vp;
   1053 	zfsctl_snapdir_t *sdp;
   1054 
   1055 	vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
   1056 	    zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
   1057 	    zfsctl_snapdir_readdir_cb, NULL);
   1058 	sdp = vp->v_data;
   1059 	sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
   1060 	sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
   1061 	mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
   1062 	avl_create(&sdp->sd_snaps, snapentry_compare,
   1063 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
   1064 	return (vp);
   1065 }
   1066 
   1067 vnode_t *
   1068 zfsctl_mknode_shares(vnode_t *pvp)
   1069 {
   1070 	vnode_t *vp;
   1071 	zfsctl_node_t *sdp;
   1072 
   1073 	vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
   1074 	    zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
   1075 	    NULL, NULL);
   1076 	sdp = vp->v_data;
   1077 	sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
   1078 	return (vp);
   1079 
   1080 }
   1081 
   1082 /* ARGSUSED */
   1083 static int
   1084 zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   1085     caller_context_t *ct)
   1086 {
   1087 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1088 	znode_t *dzp;
   1089 	int error;
   1090 
   1091 	ZFS_ENTER(zfsvfs);
   1092 	if (zfsvfs->z_shares_dir == 0) {
   1093 		ZFS_EXIT(zfsvfs);
   1094 		return (ENOTSUP);
   1095 	}
   1096 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
   1097 		error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
   1098 		VN_RELE(ZTOV(dzp));
   1099 	}
   1100 	ZFS_EXIT(zfsvfs);
   1101 	return (error);
   1102 
   1103 
   1104 }
   1105 
   1106 /* ARGSUSED */
   1107 static int
   1108 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   1109     caller_context_t *ct)
   1110 {
   1111 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
   1112 	zfsctl_snapdir_t *sdp = vp->v_data;
   1113 
   1114 	ZFS_ENTER(zfsvfs);
   1115 	zfsctl_common_getattr(vp, vap);
   1116 	vap->va_nodeid = gfs_file_inode(vp);
   1117 	vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
   1118 	vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
   1119 	ZFS_EXIT(zfsvfs);
   1120 
   1121 	return (0);
   1122 }
   1123 
   1124 /* ARGSUSED */
   1125 static void
   1126 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   1127 {
   1128 	zfsctl_snapdir_t *sdp = vp->v_data;
   1129 	void *private;
   1130 
   1131 	private = gfs_dir_inactive(vp);
   1132 	if (private != NULL) {
   1133 		ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
   1134 		mutex_destroy(&sdp->sd_lock);
   1135 		avl_destroy(&sdp->sd_snaps);
   1136 		kmem_free(private, sizeof (zfsctl_snapdir_t));
   1137 	}
   1138 }
   1139 
   1140 static const fs_operation_def_t zfsctl_tops_snapdir[] = {
   1141 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
   1142 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
   1143 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
   1144 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_snapdir_getattr } },
   1145 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
   1146 	{ VOPNAME_RENAME,	{ .vop_rename = zfsctl_snapdir_rename }	},
   1147 	{ VOPNAME_RMDIR,	{ .vop_rmdir = zfsctl_snapdir_remove }	},
   1148 	{ VOPNAME_MKDIR,	{ .vop_mkdir = zfsctl_snapdir_mkdir }	},
   1149 	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir }	},
   1150 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_snapdir_lookup }	},
   1151 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
   1152 	{ VOPNAME_INACTIVE,	{ .vop_inactive = zfsctl_snapdir_inactive } },
   1153 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_common_fid }	},
   1154 	{ NULL }
   1155 };
   1156 
   1157 static const fs_operation_def_t zfsctl_tops_shares[] = {
   1158 	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
   1159 	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
   1160 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
   1161 	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_shares_getattr } },
   1162 	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
   1163 	{ VOPNAME_READDIR,	{ .vop_readdir = zfsctl_shares_readdir } },
   1164 	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_shares_lookup }	},
   1165 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
   1166 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive } },
   1167 	{ VOPNAME_FID,		{ .vop_fid = zfsctl_shares_fid } },
   1168 	{ NULL }
   1169 };
   1170 
   1171 /*
   1172  * pvp is the GFS vnode '.zfs/snapshot'.
   1173  *
   1174  * This creates a GFS node under '.zfs/snapshot' representing each
   1175  * snapshot.  This newly created GFS node is what we mount snapshot
   1176  * vfs_t's ontop of.
   1177  */
   1178 static vnode_t *
   1179 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
   1180 {
   1181 	vnode_t *vp;
   1182 	zfsctl_node_t *zcp;
   1183 
   1184 	vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
   1185 	    zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
   1186 	zcp = vp->v_data;
   1187 	zcp->zc_id = objset;
   1188 
   1189 	return (vp);
   1190 }
   1191 
   1192 static void
   1193 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   1194 {
   1195 	zfsctl_snapdir_t *sdp;
   1196 	zfs_snapentry_t *sep, *next;
   1197 	vnode_t *dvp;
   1198 
   1199 	VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
   1200 	sdp = dvp->v_data;
   1201 
   1202 	mutex_enter(&sdp->sd_lock);
   1203 
   1204 	if (vp->v_count > 1) {
   1205 		mutex_exit(&sdp->sd_lock);
   1206 		return;
   1207 	}
   1208 	ASSERT(!vn_ismntpt(vp));
   1209 
   1210 	sep = avl_first(&sdp->sd_snaps);
   1211 	while (sep != NULL) {
   1212 		next = AVL_NEXT(&sdp->sd_snaps, sep);
   1213 
   1214 		if (sep->se_root == vp) {
   1215 			avl_remove(&sdp->sd_snaps, sep);
   1216 			kmem_free(sep->se_name, strlen(sep->se_name) + 1);
   1217 			kmem_free(sep, sizeof (zfs_snapentry_t));
   1218 			break;
   1219 		}
   1220 		sep = next;
   1221 	}
   1222 	ASSERT(sep != NULL);
   1223 
   1224 	mutex_exit(&sdp->sd_lock);
   1225 	VN_RELE(dvp);
   1226 
   1227 	/*
   1228 	 * Dispose of the vnode for the snapshot mount point.
   1229 	 * This is safe to do because once this entry has been removed
   1230 	 * from the AVL tree, it can't be found again, so cannot become
   1231 	 * "active".  If we lookup the same name again we will end up
   1232 	 * creating a new vnode.
   1233 	 */
   1234 	gfs_vop_inactive(vp, cr, ct);
   1235 }
   1236 
   1237 
   1238 /*
   1239  * These VP's should never see the light of day.  They should always
   1240  * be covered.
   1241  */
   1242 static const fs_operation_def_t zfsctl_tops_snapshot[] = {
   1243 	VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive },
   1244 	NULL, NULL
   1245 };
   1246 
   1247 int
   1248 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
   1249 {
   1250 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1251 	vnode_t *dvp, *vp;
   1252 	zfsctl_snapdir_t *sdp;
   1253 	zfsctl_node_t *zcp;
   1254 	zfs_snapentry_t *sep;
   1255 	int error;
   1256 
   1257 	ASSERT(zfsvfs->z_ctldir != NULL);
   1258 	error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
   1259 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
   1260 	if (error != 0)
   1261 		return (error);
   1262 	sdp = dvp->v_data;
   1263 
   1264 	mutex_enter(&sdp->sd_lock);
   1265 	sep = avl_first(&sdp->sd_snaps);
   1266 	while (sep != NULL) {
   1267 		vp = sep->se_root;
   1268 		zcp = vp->v_data;
   1269 		if (zcp->zc_id == objsetid)
   1270 			break;
   1271 
   1272 		sep = AVL_NEXT(&sdp->sd_snaps, sep);
   1273 	}
   1274 
   1275 	if (sep != NULL) {
   1276 		VN_HOLD(vp);
   1277 		/*
   1278 		 * Return the mounted root rather than the covered mount point.
   1279 		 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
   1280 		 * and returns the ZFS vnode mounted on top of the GFS node.
   1281 		 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
   1282 		 */
   1283 		error = traverse(&vp);
   1284 		if (error == 0) {
   1285 			if (vp == sep->se_root)
   1286 				error = EINVAL;
   1287 			else
   1288 				*zfsvfsp = VTOZ(vp)->z_zfsvfs;
   1289 		}
   1290 		mutex_exit(&sdp->sd_lock);
   1291 		VN_RELE(vp);
   1292 	} else {
   1293 		error = EINVAL;
   1294 		mutex_exit(&sdp->sd_lock);
   1295 	}
   1296 
   1297 	VN_RELE(dvp);
   1298 
   1299 	return (error);
   1300 }
   1301 
   1302 /*
   1303  * Unmount any snapshots for the given filesystem.  This is called from
   1304  * zfs_umount() - if we have a ctldir, then go through and unmount all the
   1305  * snapshots.
   1306  */
   1307 int
   1308 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
   1309 {
   1310 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
   1311 	vnode_t *dvp;
   1312 	zfsctl_snapdir_t *sdp;
   1313 	zfs_snapentry_t *sep, *next;
   1314 	int error;
   1315 
   1316 	ASSERT(zfsvfs->z_ctldir != NULL);
   1317 	error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
   1318 	    NULL, 0, NULL, cr, NULL, NULL, NULL);
   1319 	if (error != 0)
   1320 		return (error);
   1321 	sdp = dvp->v_data;
   1322 
   1323 	mutex_enter(&sdp->sd_lock);
   1324 
   1325 	sep = avl_first(&sdp->sd_snaps);
   1326 	while (sep != NULL) {
   1327 		next = AVL_NEXT(&sdp->sd_snaps, sep);
   1328 
   1329 		/*
   1330 		 * If this snapshot is not mounted, then it must
   1331 		 * have just been unmounted by somebody else, and
   1332 		 * will be cleaned up by zfsctl_snapdir_inactive().
   1333 		 */
   1334 		if (vn_ismntpt(sep->se_root)) {
   1335 			avl_remove(&sdp->sd_snaps, sep);
   1336 			error = zfsctl_unmount_snap(sep, fflags, cr);
   1337 			if (error) {
   1338 				avl_add(&sdp->sd_snaps, sep);
   1339 				break;
   1340 			}
   1341 		}
   1342 		sep = next;
   1343 	}
   1344 
   1345 	mutex_exit(&sdp->sd_lock);
   1346 	VN_RELE(dvp);
   1347 
   1348 	return (error);
   1349 }
   1350