Home | History | Annotate | Download | only in dev
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /* vnode ops for the /dev/zvol directory */
     27 
     28 #include <sys/types.h>
     29 #include <sys/param.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/ddi.h>
     32 #include <sys/sunndi.h>
     33 #include <sys/sunldi.h>
     34 #include <fs/fs_subr.h>
     35 #include <sys/fs/dv_node.h>
     36 #include <sys/fs/sdev_impl.h>
     37 #include <sys/zfs_ioctl.h>
     38 #include <sys/policy.h>
     39 #include <sys/stat.h>
     40 #include <sys/vfs_opreg.h>
     41 
     42 struct vnodeops	*devzvol_vnodeops;
     43 static uint64_t devzvol_gen = 0;
     44 static uint64_t devzvol_zclist;
     45 static size_t devzvol_zclist_size;
     46 static ldi_ident_t devzvol_li;
     47 static ldi_handle_t devzvol_lh;
     48 static kmutex_t devzvol_mtx;
     49 static boolean_t devzvol_isopen;
     50 
     51 /*
     52  * we need to use ddi_mod* since fs/dev gets loaded early on in
     53  * startup(), and linking fs/dev to fs/zfs would drag in a lot of
     54  * other stuff (like drv/random) before the rest of the system is
     55  * ready to go
     56  */
     57 ddi_modhandle_t zfs_mod;
     58 int (*szcm)(char *);
     59 int (*szn2m)(char *, minor_t *);
     60 
     61 int
     62 sdev_zvol_create_minor(char *dsname)
     63 {
     64 	return ((*szcm)(dsname));
     65 }
     66 
     67 int
     68 sdev_zvol_name2minor(char *dsname, minor_t *minor)
     69 {
     70 	return ((*szn2m)(dsname, minor));
     71 }
     72 
     73 int
     74 devzvol_open_zfs()
     75 {
     76 	int rc;
     77 
     78 	devzvol_li = ldi_ident_from_anon();
     79 	if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
     80 	    &devzvol_lh, devzvol_li))
     81 		return (-1);
     82 	if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
     83 	    KRTLD_MODE_FIRST, &rc)) == NULL)) {
     84 		return (rc);
     85 	}
     86 	ASSERT(szcm == NULL && szn2m == NULL);
     87 	if ((szcm = (int (*)(char *))
     88 	    ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
     89 		cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
     90 		return (rc);
     91 	}
     92 	if ((szn2m = (int(*)(char *, minor_t *))
     93 	    ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
     94 		cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
     95 		return (rc);
     96 	}
     97 	return (0);
     98 }
     99 
    100 void
    101 devzvol_close_zfs()
    102 {
    103 	szcm = NULL;
    104 	szn2m = NULL;
    105 	(void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
    106 	ldi_ident_release(devzvol_li);
    107 	if (zfs_mod != NULL) {
    108 		(void) ddi_modclose(zfs_mod);
    109 		zfs_mod = NULL;
    110 	}
    111 }
    112 
    113 int
    114 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
    115 {
    116 	uint64_t cookie;
    117 	int size = 8000;
    118 	int unused;
    119 	int rc;
    120 
    121 	if (cmd != ZFS_IOC_POOL_CONFIGS)
    122 		mutex_enter(&devzvol_mtx);
    123 	if (!devzvol_isopen) {
    124 		if ((rc = devzvol_open_zfs()) == 0) {
    125 			devzvol_isopen = B_TRUE;
    126 		} else {
    127 			if (cmd != ZFS_IOC_POOL_CONFIGS)
    128 				mutex_exit(&devzvol_mtx);
    129 			return (ENXIO);
    130 		}
    131 	}
    132 	cookie = zc->zc_cookie;
    133 again:
    134 	zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
    135 	    KM_SLEEP);
    136 	zc->zc_nvlist_dst_size = size;
    137 	rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
    138 	    &unused);
    139 	if (rc == ENOMEM) {
    140 		int newsize;
    141 		newsize = zc->zc_nvlist_dst_size;
    142 		ASSERT(newsize > size);
    143 		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
    144 		size = newsize;
    145 		zc->zc_cookie = cookie;
    146 		goto again;
    147 	}
    148 	if (alloc_size == NULL)
    149 		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
    150 	else
    151 		*alloc_size = size;
    152 	if (cmd != ZFS_IOC_POOL_CONFIGS)
    153 		mutex_exit(&devzvol_mtx);
    154 	return (rc);
    155 }
    156 
    157 /* figures out if the objset exists and returns its type */
    158 int
    159 devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
    160 {
    161 	boolean_t	ispool;
    162 	zfs_cmd_t	*zc;
    163 	int rc;
    164 
    165 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
    166 	(void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
    167 
    168 	ispool = (strchr(dsname, '/') == NULL) ? B_TRUE : B_FALSE;
    169 	if (!ispool && sdev_zvol_name2minor(dsname, NULL) == 0) {
    170 		sdcmn_err13(("found cached minor node"));
    171 		if (type)
    172 			*type = DMU_OST_ZVOL;
    173 		kmem_free(zc, sizeof (zfs_cmd_t));
    174 		return (0);
    175 	}
    176 	rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
    177 	    ZFS_IOC_OBJSET_STATS, zc, NULL);
    178 	if (type && rc == 0)
    179 		*type = (ispool) ? DMU_OST_ZFS :
    180 		    zc->zc_objset_stats.dds_type;
    181 	kmem_free(zc, sizeof (zfs_cmd_t));
    182 	return (rc);
    183 }
    184 
    185 /*
    186  * returns what the zfs dataset name should be, given the /dev/zvol
    187  * path and an optional name; otherwise NULL
    188  */
    189 char *
    190 devzvol_make_dsname(const char *path, const char *name)
    191 {
    192 	char *dsname;
    193 	const char *ptr;
    194 	int dslen;
    195 
    196 	if (strcmp(path, ZVOL_DIR) == 0)
    197 		return (NULL);
    198 	if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
    199 		return (NULL);
    200 	ptr = path + strlen(ZVOL_DIR);
    201 	if (strncmp(ptr, "/dsk", 4) == 0)
    202 		ptr += strlen("/dsk");
    203 	else if (strncmp(ptr, "/rdsk", 5) == 0)
    204 		ptr += strlen("/rdsk");
    205 	else
    206 		return (NULL);
    207 	if (*ptr == '/')
    208 		ptr++;
    209 
    210 	dslen = strlen(ptr);
    211 	if (dslen)
    212 		dslen++;			/* plus null */
    213 	if (name)
    214 		dslen += strlen(name) + 1;	/* plus slash */
    215 	dsname = kmem_zalloc(dslen, KM_SLEEP);
    216 	if (*ptr) {
    217 		(void) strlcpy(dsname, ptr, dslen);
    218 		if (name)
    219 			(void) strlcat(dsname, "/", dslen);
    220 	}
    221 	if (name)
    222 		(void) strlcat(dsname, name, dslen);
    223 	return (dsname);
    224 }
    225 
    226 /*
    227  * check if the zvol's sdev_node is still valid, which means make
    228  * sure the zvol is still valid.  zvol minors aren't proactively
    229  * destroyed when the zvol is destroyed, so we use a validator to clean
    230  * these up (in other words, when such nodes are encountered during
    231  * subsequent lookup() and readdir() operations) so that only valid
    232  * nodes are returned.  The ordering between devname_lookup_func and
    233  * devzvol_validate is a little inefficient in the case of invalid
    234  * or stale nodes because devname_lookup_func calls
    235  * devzvol_create_{dir, link}, then the validator says it's invalid,
    236  * and then the node gets cleaned up.
    237  */
    238 int
    239 devzvol_validate(struct sdev_node *dv)
    240 {
    241 	dmu_objset_type_t do_type;
    242 	char *dsname;
    243 	char *nm = dv->sdev_name;
    244 	int rc;
    245 
    246 	sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
    247 	/*
    248 	 * validate only READY nodes; if someone is sitting on the
    249 	 * directory of a dataset that just got destroyed we could
    250 	 * get a zombie node which we just skip.
    251 	 */
    252 	if (dv->sdev_state != SDEV_READY) {
    253 		sdcmn_err13(("skipping '%s'", nm));
    254 		return (SDEV_VTOR_SKIP);
    255 	}
    256 
    257 	if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
    258 	    (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
    259 		return (SDEV_VTOR_VALID);
    260 	dsname = devzvol_make_dsname(dv->sdev_path, NULL);
    261 	if (dsname == NULL)
    262 		return (SDEV_VTOR_INVALID);
    263 
    264 	rc = devzvol_objset_check(dsname, &do_type);
    265 	sdcmn_err13(("  '%s' rc %d", dsname, rc));
    266 	if (rc != 0) {
    267 		kmem_free(dsname, strlen(dsname) + 1);
    268 		return (SDEV_VTOR_INVALID);
    269 	}
    270 	sdcmn_err13(("  v_type %d do_type %d",
    271 	    SDEVTOV(dv)->v_type, do_type));
    272 	if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
    273 	    (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
    274 		kmem_free(dsname, strlen(dsname) + 1);
    275 		return (SDEV_VTOR_STALE);
    276 	}
    277 	if (SDEVTOV(dv)->v_type == VLNK) {
    278 		char *ptr, *link;
    279 		long val = 0;
    280 		minor_t lminor, ominor;
    281 
    282 		rc = sdev_getlink(SDEVTOV(dv), &link);
    283 		ASSERT(rc == 0);
    284 
    285 		ptr = strrchr(link, ':') + 1;
    286 		rc = ddi_strtol(ptr, NULL, 10, &val);
    287 		kmem_free(link, strlen(link) + 1);
    288 		ASSERT(rc == 0 && val != 0);
    289 		lminor = (minor_t)val;
    290 		if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
    291 		    ominor != lminor) {
    292 			kmem_free(dsname, strlen(dsname) + 1);
    293 			return (SDEV_VTOR_STALE);
    294 		}
    295 	}
    296 	kmem_free(dsname, strlen(dsname) + 1);
    297 	return (SDEV_VTOR_VALID);
    298 }
    299 
    300 /*
    301  * creates directories as needed in response to a readdir
    302  */
    303 void
    304 devzvol_create_pool_dirs(struct vnode *dvp)
    305 {
    306 	zfs_cmd_t	*zc;
    307 	nvlist_t *nv = NULL;
    308 	nvpair_t *elem = NULL;
    309 	size_t size;
    310 	int pools = 0;
    311 	int rc;
    312 
    313 	sdcmn_err13(("devzvol_create_pool_dirs"));
    314 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
    315 	mutex_enter(&devzvol_mtx);
    316 	zc->zc_cookie = devzvol_gen;
    317 
    318 	rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
    319 	switch (rc) {
    320 		case 0:
    321 			/* new generation */
    322 			ASSERT(devzvol_gen != zc->zc_cookie);
    323 			devzvol_gen = zc->zc_cookie;
    324 			if (devzvol_zclist)
    325 				kmem_free((void *)(uintptr_t)devzvol_zclist,
    326 				    devzvol_zclist_size);
    327 			devzvol_zclist = zc->zc_nvlist_dst;
    328 			devzvol_zclist_size = size;
    329 			break;
    330 		case EEXIST:
    331 			/*
    332 			 * no change in the configuration; still need
    333 			 * to do lookups in case we did a lookup in
    334 			 * zvol/rdsk but not zvol/dsk (or vice versa)
    335 			 */
    336 			kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
    337 			    size);
    338 			break;
    339 		default:
    340 			kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
    341 			    size);
    342 			goto out;
    343 	}
    344 	rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
    345 	    devzvol_zclist_size, &nv, 0);
    346 	if (rc) {
    347 		ASSERT(rc == 0);
    348 		kmem_free((void *)(uintptr_t)devzvol_zclist,
    349 		    devzvol_zclist_size);
    350 		devzvol_gen = 0;
    351 		devzvol_zclist = NULL;
    352 		devzvol_zclist_size = 0;
    353 		goto out;
    354 	}
    355 	mutex_exit(&devzvol_mtx);
    356 	while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
    357 		struct vnode *vp;
    358 		ASSERT(dvp->v_count > 0);
    359 		rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
    360 		    NULL, kcred, NULL, 0, NULL);
    361 		/* should either work, or not be visible from a zone */
    362 		ASSERT(rc == 0 || rc == ENOENT);
    363 		if (rc == 0)
    364 			VN_RELE(vp);
    365 		pools++;
    366 	}
    367 	nvlist_free(nv);
    368 	mutex_enter(&devzvol_mtx);
    369 	if (devzvol_isopen && pools == 0) {
    370 		/* clean up so zfs can be unloaded */
    371 		devzvol_close_zfs();
    372 		devzvol_isopen = B_FALSE;
    373 	}
    374 out:
    375 	mutex_exit(&devzvol_mtx);
    376 	kmem_free(zc, sizeof (zfs_cmd_t));
    377 }
    378 
    379 /*ARGSUSED3*/
    380 static int
    381 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
    382     cred_t *cred, void *whatever, char *whichever)
    383 {
    384 	timestruc_t now;
    385 	struct vattr *vap = (struct vattr *)arg;
    386 
    387 	sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
    388 	    ddv->sdev_path, nm));
    389 	ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
    390 	    strlen(ZVOL_DIR)) == 0);
    391 	*vap = *sdev_getdefault_attr(VDIR);
    392 	gethrestime(&now);
    393 	vap->va_atime = now;
    394 	vap->va_mtime = now;
    395 	vap->va_ctime = now;
    396 	return (0);
    397 }
    398 
    399 /*ARGSUSED3*/
    400 static int
    401 devzvol_create_link(struct sdev_node *ddv, char *nm,
    402     void **arg, cred_t *cred, void *whatever, char *whichever)
    403 {
    404 	minor_t minor;
    405 	char *pathname = (char *)*arg;
    406 	int rc;
    407 	char *dsname;
    408 	char *x;
    409 	char str[MAXNAMELEN];
    410 	sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
    411 	    ddv->sdev_path, nm));
    412 	dsname = devzvol_make_dsname(ddv->sdev_path, nm);
    413 	rc = sdev_zvol_create_minor(dsname);
    414 	if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
    415 	    sdev_zvol_name2minor(dsname, &minor)) {
    416 		sdcmn_err13(("devzvol_create_link %d", rc));
    417 		kmem_free(dsname, strlen(dsname) + 1);
    418 		return (-1);
    419 	}
    420 	kmem_free(dsname, strlen(dsname) + 1);
    421 
    422 	/*
    423 	 * This is a valid zvol; create a symlink that points to the
    424 	 * minor which was created under /devices/pseudo/zfs@0
    425 	 */
    426 	*pathname = '\0';
    427 	for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
    428 		(void) strcat(pathname, "../");
    429 	(void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
    430 	(void) strncat(pathname, str, MAXPATHLEN);
    431 	if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
    432 	    strlen(ZVOL_FULL_RDEV_DIR)) == 0)
    433 		(void) strcat(pathname, ",raw");
    434 	return (0);
    435 }
    436 
    437 /* Clean zvol sdev_nodes that are no longer valid.  */
    438 static void
    439 devzvol_prunedir(struct sdev_node *ddv)
    440 {
    441 	struct sdev_node *dv;
    442 
    443 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
    444 
    445 	sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
    446 	ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
    447 	if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
    448 		rw_exit(&ddv->sdev_contents);
    449 		rw_enter(&ddv->sdev_contents, RW_WRITER);
    450 	}
    451 
    452 	dv = SDEV_FIRST_ENTRY(ddv);
    453 	while (dv) {
    454 		sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
    455 		/* skip stale nodes */
    456 		if (dv->sdev_flags & SDEV_STALE) {
    457 			sdcmn_err13(("  stale"));
    458 			dv = SDEV_NEXT_ENTRY(ddv, dv);
    459 			continue;
    460 		}
    461 
    462 		switch (devzvol_validate(dv)) {
    463 		case SDEV_VTOR_VALID:
    464 		case SDEV_VTOR_SKIP:
    465 			dv = SDEV_NEXT_ENTRY(ddv, dv);
    466 			continue;
    467 		case SDEV_VTOR_INVALID:
    468 			sdcmn_err7(("prunedir: destroy invalid "
    469 			    "node: %s\n", dv->sdev_name));
    470 			break;
    471 		}
    472 
    473 		if ((SDEVTOV(dv)->v_type == VDIR) &&
    474 		    (sdev_cleandir(dv, NULL, 0) != 0)) {
    475 			dv = SDEV_NEXT_ENTRY(ddv, dv);
    476 			continue;
    477 		}
    478 		SDEV_HOLD(dv);
    479 		/* remove the cache node */
    480 		if (sdev_cache_update(ddv, &dv, dv->sdev_name,
    481 		    SDEV_CACHE_DELETE) == 0)
    482 			dv = SDEV_FIRST_ENTRY(ddv);
    483 		else
    484 			dv = SDEV_NEXT_ENTRY(ddv, dv);
    485 	}
    486 	rw_downgrade(&ddv->sdev_contents);
    487 }
    488 
    489 /*ARGSUSED*/
    490 static int
    491 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
    492     struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
    493     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
    494 {
    495 	enum vtype expected_type = VDIR;
    496 	struct sdev_node *parent = VTOSDEV(dvp);
    497 	char *dsname;
    498 	dmu_objset_type_t do_type;
    499 	int error;
    500 
    501 	sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
    502 	*vpp = NULL;
    503 	/* execute access is required to search the directory */
    504 	if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
    505 		return (error);
    506 
    507 	rw_enter(&parent->sdev_contents, RW_READER);
    508 	if (!SDEV_IS_GLOBAL(parent)) {
    509 		rw_exit(&parent->sdev_contents);
    510 		return (prof_lookup(dvp, nm, vpp, cred));
    511 	}
    512 
    513 	dsname = devzvol_make_dsname(parent->sdev_path, nm);
    514 	rw_exit(&parent->sdev_contents);
    515 	sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
    516 	if (dsname) {
    517 		error = devzvol_objset_check(dsname, &do_type);
    518 		if (error != 0) {
    519 			error = ENOENT;
    520 			goto out;
    521 		}
    522 		if (do_type == DMU_OST_ZVOL)
    523 			expected_type = VLNK;
    524 	}
    525 	/*
    526 	 * the callbacks expect:
    527 	 *
    528 	 * parent->sdev_path		   nm
    529 	 * /dev/zvol			   {r}dsk
    530 	 * /dev/zvol/{r}dsk		   <pool name>
    531 	 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
    532 	 *
    533 	 * sdev_name is always last path component of sdev_path
    534 	 */
    535 	if (expected_type == VDIR) {
    536 		error = devname_lookup_func(parent, nm, vpp, cred,
    537 		    devzvol_create_dir, SDEV_VATTR);
    538 	} else {
    539 		error = devname_lookup_func(parent, nm, vpp, cred,
    540 		    devzvol_create_link, SDEV_VLINK);
    541 	}
    542 	sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
    543 	ASSERT(error || ((*vpp)->v_type == expected_type));
    544 out:
    545 	if (dsname)
    546 		kmem_free(dsname, strlen(dsname) + 1);
    547 	sdcmn_err13(("devzvol_lookup %d", error));
    548 	return (error);
    549 }
    550 
    551 /*
    552  * We allow create to find existing nodes
    553  *	- if the node doesn't exist - EROFS
    554  *	- creating an existing dir read-only succeeds, otherwise EISDIR
    555  *	- exclusive creates fail - EEXIST
    556  */
    557 /*ARGSUSED2*/
    558 static int
    559 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
    560     int mode, struct vnode **vpp, struct cred *cred, int flag,
    561     caller_context_t *ct, vsecattr_t *vsecp)
    562 {
    563 	int error;
    564 	struct vnode *vp;
    565 
    566 	*vpp = NULL;
    567 
    568 	error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
    569 	    NULL);
    570 	if (error == 0) {
    571 		if (excl == EXCL)
    572 			error = EEXIST;
    573 		else if (vp->v_type == VDIR && (mode & VWRITE))
    574 			error = EISDIR;
    575 		else
    576 			error = VOP_ACCESS(vp, mode, 0, cred, ct);
    577 
    578 		if (error) {
    579 			VN_RELE(vp);
    580 		} else
    581 			*vpp = vp;
    582 	} else if (error == ENOENT) {
    583 		error = EROFS;
    584 	}
    585 
    586 	return (error);
    587 }
    588 
    589 void sdev_iter_snapshots(struct vnode *dvp, char *name);
    590 
    591 void
    592 sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
    593 {
    594 	zfs_cmd_t	*zc;
    595 	int rc;
    596 
    597 	sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
    598 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
    599 	(void) strcpy(zc->zc_name, name);
    600 
    601 	while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
    602 		struct vnode *vpp;
    603 		char *ptr;
    604 
    605 		sdcmn_err13(("  name %s", zc->zc_name));
    606 		if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
    607 			goto skip;
    608 		ptr = strrchr(zc->zc_name, '/') + 1;
    609 		rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
    610 		    kcred, NULL, NULL, NULL);
    611 		if (rc == 0) {
    612 			VN_RELE(vpp);
    613 		} else if (rc == ENOENT) {
    614 			goto skip;
    615 		} else {
    616 			/* EBUSY == problem with zvols's dmu holds? */
    617 			ASSERT(0);
    618 			goto skip;
    619 		}
    620 		if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
    621 		    zc->zc_objset_stats.dds_type != DMU_OST_ZFS)
    622 			sdev_iter_snapshots(dvp, zc->zc_name);
    623 skip:
    624 		(void) strcpy(zc->zc_name, name);
    625 	}
    626 	kmem_free(zc, sizeof (zfs_cmd_t));
    627 }
    628 
    629 void
    630 sdev_iter_snapshots(struct vnode *dvp, char *name)
    631 {
    632 	sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
    633 }
    634 
    635 /*ARGSUSED4*/
    636 static int
    637 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
    638     int *eofp, caller_context_t *ct_unused, int flags_unused)
    639 {
    640 	struct sdev_node *sdvp = VTOSDEV(dvp);
    641 	char *ptr;
    642 
    643 	sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
    644 	    sdvp->sdev_name));
    645 
    646 	if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
    647 		struct vnode *vp;
    648 
    649 		rw_exit(&sdvp->sdev_contents);
    650 		(void) devname_lookup_func(sdvp, "dsk", &vp, cred,
    651 		    devzvol_create_dir, SDEV_VATTR);
    652 		VN_RELE(vp);
    653 		(void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
    654 		    devzvol_create_dir, SDEV_VATTR);
    655 		VN_RELE(vp);
    656 		rw_enter(&sdvp->sdev_contents, RW_READER);
    657 		return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
    658 	}
    659 	if (uiop->uio_offset == 0)
    660 		devzvol_prunedir(sdvp);
    661 	ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
    662 	if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
    663 		rw_exit(&sdvp->sdev_contents);
    664 		devzvol_create_pool_dirs(dvp);
    665 		rw_enter(&sdvp->sdev_contents, RW_READER);
    666 		return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
    667 	}
    668 
    669 	ptr = strchr(ptr + 1, '/') + 1;
    670 	rw_exit(&sdvp->sdev_contents);
    671 	sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
    672 	rw_enter(&sdvp->sdev_contents, RW_READER);
    673 	return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
    674 }
    675 
    676 const fs_operation_def_t devzvol_vnodeops_tbl[] = {
    677 	VOPNAME_READDIR,	{ .vop_readdir = devzvol_readdir },
    678 	VOPNAME_LOOKUP,		{ .vop_lookup = devzvol_lookup },
    679 	VOPNAME_CREATE,		{ .vop_create = devzvol_create },
    680 	VOPNAME_RENAME,		{ .error = fs_nosys },
    681 	VOPNAME_MKDIR,		{ .error = fs_nosys },
    682 	VOPNAME_RMDIR,		{ .error = fs_nosys },
    683 	VOPNAME_REMOVE,		{ .error = fs_nosys },
    684 	VOPNAME_SYMLINK,	{ .error = fs_nosys },
    685 	NULL,			NULL
    686 };
    687