Home | History | Annotate | Download | only in dev
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * utility routines for the /dev fs
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/t_lock.h>
     33 #include <sys/systm.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/user.h>
     36 #include <sys/time.h>
     37 #include <sys/vfs.h>
     38 #include <sys/vnode.h>
     39 #include <sys/file.h>
     40 #include <sys/fcntl.h>
     41 #include <sys/flock.h>
     42 #include <sys/kmem.h>
     43 #include <sys/uio.h>
     44 #include <sys/errno.h>
     45 #include <sys/stat.h>
     46 #include <sys/cred.h>
     47 #include <sys/dirent.h>
     48 #include <sys/pathname.h>
     49 #include <sys/cmn_err.h>
     50 #include <sys/debug.h>
     51 #include <sys/mode.h>
     52 #include <sys/policy.h>
     53 #include <fs/fs_subr.h>
     54 #include <sys/mount.h>
     55 #include <sys/fs/snode.h>
     56 #include <sys/fs/dv_node.h>
     57 #include <sys/fs/sdev_impl.h>
     58 #include <sys/sunndi.h>
     59 #include <sys/sunmdi.h>
     60 #include <sys/conf.h>
     61 #include <sys/proc.h>
     62 #include <sys/user.h>
     63 #include <sys/modctl.h>
     64 
     65 #ifdef DEBUG
     66 int sdev_debug = 0x00000001;
     67 int sdev_debug_cache_flags = 0;
     68 #endif
     69 
     70 /*
     71  * globals
     72  */
     73 /* prototype memory vattrs */
     74 vattr_t sdev_vattr_dir = {
     75 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
     76 	VDIR,					/* va_type */
     77 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
     78 	SDEV_UID_DEFAULT,			/* va_uid */
     79 	SDEV_GID_DEFAULT,			/* va_gid */
     80 	0,					/* va_fsid */
     81 	0,					/* va_nodeid */
     82 	0,					/* va_nlink */
     83 	0,					/* va_size */
     84 	0,					/* va_atime */
     85 	0,					/* va_mtime */
     86 	0,					/* va_ctime */
     87 	0,					/* va_rdev */
     88 	0,					/* va_blksize */
     89 	0,					/* va_nblocks */
     90 	0					/* va_vcode */
     91 };
     92 
     93 vattr_t sdev_vattr_lnk = {
     94 	AT_TYPE|AT_MODE,			/* va_mask */
     95 	VLNK,					/* va_type */
     96 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
     97 	SDEV_UID_DEFAULT,			/* va_uid */
     98 	SDEV_GID_DEFAULT,			/* va_gid */
     99 	0,					/* va_fsid */
    100 	0,					/* va_nodeid */
    101 	0,					/* va_nlink */
    102 	0,					/* va_size */
    103 	0,					/* va_atime */
    104 	0,					/* va_mtime */
    105 	0,					/* va_ctime */
    106 	0,					/* va_rdev */
    107 	0,					/* va_blksize */
    108 	0,					/* va_nblocks */
    109 	0					/* va_vcode */
    110 };
    111 
    112 vattr_t sdev_vattr_blk = {
    113 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
    114 	VBLK,					/* va_type */
    115 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
    116 	SDEV_UID_DEFAULT,			/* va_uid */
    117 	SDEV_GID_DEFAULT,			/* va_gid */
    118 	0,					/* va_fsid */
    119 	0,					/* va_nodeid */
    120 	0,					/* va_nlink */
    121 	0,					/* va_size */
    122 	0,					/* va_atime */
    123 	0,					/* va_mtime */
    124 	0,					/* va_ctime */
    125 	0,					/* va_rdev */
    126 	0,					/* va_blksize */
    127 	0,					/* va_nblocks */
    128 	0					/* va_vcode */
    129 };
    130 
    131 vattr_t sdev_vattr_chr = {
    132 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
    133 	VCHR,					/* va_type */
    134 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
    135 	SDEV_UID_DEFAULT,			/* va_uid */
    136 	SDEV_GID_DEFAULT,			/* va_gid */
    137 	0,					/* va_fsid */
    138 	0,					/* va_nodeid */
    139 	0,					/* va_nlink */
    140 	0,					/* va_size */
    141 	0,					/* va_atime */
    142 	0,					/* va_mtime */
    143 	0,					/* va_ctime */
    144 	0,					/* va_rdev */
    145 	0,					/* va_blksize */
    146 	0,					/* va_nblocks */
    147 	0					/* va_vcode */
    148 };
    149 
    150 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
    151 int		devtype;		/* fstype */
    152 
    153 /* static */
    154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
    155 static void sdev_set_no_negcache(struct sdev_node *);
    156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
    157 static void sdev_free_vtab(fs_operation_def_t *);
    158 
    159 static void
    160 sdev_prof_free(struct sdev_node *dv)
    161 {
    162 	ASSERT(!SDEV_IS_GLOBAL(dv));
    163 	if (dv->sdev_prof.dev_name)
    164 		nvlist_free(dv->sdev_prof.dev_name);
    165 	if (dv->sdev_prof.dev_map)
    166 		nvlist_free(dv->sdev_prof.dev_map);
    167 	if (dv->sdev_prof.dev_symlink)
    168 		nvlist_free(dv->sdev_prof.dev_symlink);
    169 	if (dv->sdev_prof.dev_glob_incdir)
    170 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
    171 	if (dv->sdev_prof.dev_glob_excdir)
    172 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
    173 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    174 }
    175 
    176 /* sdev_node cache constructor */
    177 /*ARGSUSED1*/
    178 static int
    179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
    180 {
    181 	struct sdev_node *dv = (struct sdev_node *)buf;
    182 	struct vnode *vp;
    183 
    184 	bzero(buf, sizeof (struct sdev_node));
    185 	vp = dv->sdev_vnode = vn_alloc(flag);
    186 	if (vp == NULL) {
    187 		return (-1);
    188 	}
    189 	vp->v_data = dv;
    190 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
    191 	return (0);
    192 }
    193 
    194 /* sdev_node cache destructor */
    195 /*ARGSUSED1*/
    196 static void
    197 i_sdev_node_dtor(void *buf, void *arg)
    198 {
    199 	struct sdev_node *dv = (struct sdev_node *)buf;
    200 	struct vnode *vp = SDEVTOV(dv);
    201 
    202 	rw_destroy(&dv->sdev_contents);
    203 	vn_free(vp);
    204 }
    205 
    206 /* initialize sdev_node cache */
    207 void
    208 sdev_node_cache_init()
    209 {
    210 	int flags = 0;
    211 
    212 #ifdef	DEBUG
    213 	flags = sdev_debug_cache_flags;
    214 	if (flags)
    215 		sdcmn_err(("cache debug flags 0x%x\n", flags));
    216 #endif	/* DEBUG */
    217 
    218 	ASSERT(sdev_node_cache == NULL);
    219 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
    220 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
    221 	    NULL, NULL, NULL, flags);
    222 }
    223 
    224 /* destroy sdev_node cache */
    225 void
    226 sdev_node_cache_fini()
    227 {
    228 	ASSERT(sdev_node_cache != NULL);
    229 	kmem_cache_destroy(sdev_node_cache);
    230 	sdev_node_cache = NULL;
    231 }
    232 
    233 /*
    234  * Compare two nodes lexographically to balance avl tree
    235  */
    236 static int
    237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
    238 {
    239 	int rv;
    240 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
    241 		return (0);
    242 	return ((rv < 0) ? -1 : 1);
    243 }
    244 
    245 void
    246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
    247 {
    248 	ASSERT(dv);
    249 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
    250 	dv->sdev_state = state;
    251 }
    252 
    253 static void
    254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
    255 {
    256 	timestruc_t	now;
    257 	struct vattr	*attrp;
    258 	uint_t		mask;
    259 
    260 	ASSERT(dv->sdev_attr);
    261 	ASSERT(vap);
    262 
    263 	attrp = dv->sdev_attr;
    264 	mask = vap->va_mask;
    265 	if (mask & AT_TYPE)
    266 		attrp->va_type = vap->va_type;
    267 	if (mask & AT_MODE)
    268 		attrp->va_mode = vap->va_mode;
    269 	if (mask & AT_UID)
    270 		attrp->va_uid = vap->va_uid;
    271 	if (mask & AT_GID)
    272 		attrp->va_gid = vap->va_gid;
    273 	if (mask & AT_RDEV)
    274 		attrp->va_rdev = vap->va_rdev;
    275 
    276 	gethrestime(&now);
    277 	attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
    278 	attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
    279 	attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
    280 }
    281 
    282 static void
    283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
    284 {
    285 	ASSERT(dv->sdev_attr == NULL);
    286 	ASSERT(vap->va_mask & AT_TYPE);
    287 	ASSERT(vap->va_mask & AT_MODE);
    288 
    289 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
    290 	sdev_attr_update(dv, vap);
    291 }
    292 
    293 /* alloc and initialize a sdev_node */
    294 int
    295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
    296     vattr_t *vap)
    297 {
    298 	struct sdev_node *dv = NULL;
    299 	struct vnode *vp;
    300 	size_t nmlen, len;
    301 	devname_handle_t  *dhl;
    302 
    303 	nmlen = strlen(nm) + 1;
    304 	if (nmlen > MAXNAMELEN) {
    305 		sdcmn_err9(("sdev_nodeinit: node name %s"
    306 		    " too long\n", nm));
    307 		*newdv = NULL;
    308 		return (ENAMETOOLONG);
    309 	}
    310 
    311 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
    312 
    313 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
    314 	bcopy(nm, dv->sdev_name, nmlen);
    315 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
    316 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
    317 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
    318 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
    319 	/* overwritten for VLNK nodes */
    320 	dv->sdev_symlink = NULL;
    321 
    322 	vp = SDEVTOV(dv);
    323 	vn_reinit(vp);
    324 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
    325 	if (vap)
    326 		vp->v_type = vap->va_type;
    327 
    328 	/*
    329 	 * initialized to the parent's vnodeops.
    330 	 * maybe overwriten for a VDIR
    331 	 */
    332 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
    333 	vn_exists(vp);
    334 
    335 	dv->sdev_dotdot = NULL;
    336 	dv->sdev_attrvp = NULL;
    337 	if (vap) {
    338 		sdev_attr_alloc(dv, vap);
    339 	} else {
    340 		dv->sdev_attr = NULL;
    341 	}
    342 
    343 	dv->sdev_ino = sdev_mkino(dv);
    344 	dv->sdev_nlink = 0;		/* updated on insert */
    345 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
    346 	dv->sdev_flags |= SDEV_BUILD;
    347 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
    348 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
    349 	if (SDEV_IS_GLOBAL(ddv)) {
    350 		dv->sdev_flags |= SDEV_GLOBAL;
    351 		dhl = &(dv->sdev_handle);
    352 		dhl->dh_data = dv;
    353 		dhl->dh_args = NULL;
    354 		sdev_set_no_negcache(dv);
    355 		dv->sdev_gdir_gen = 0;
    356 	} else {
    357 		dv->sdev_flags &= ~SDEV_GLOBAL;
    358 		dv->sdev_origin = NULL; /* set later */
    359 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    360 		dv->sdev_ldir_gen = 0;
    361 		dv->sdev_devtree_gen = 0;
    362 	}
    363 
    364 	rw_enter(&dv->sdev_contents, RW_WRITER);
    365 	sdev_set_nodestate(dv, SDEV_INIT);
    366 	rw_exit(&dv->sdev_contents);
    367 	*newdv = dv;
    368 
    369 	return (0);
    370 }
    371 
    372 /*
    373  * transition a sdev_node into SDEV_READY state
    374  */
    375 int
    376 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
    377     void *args, struct cred *cred)
    378 {
    379 	int error = 0;
    380 	struct vnode *vp = SDEVTOV(dv);
    381 	vtype_t type;
    382 
    383 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
    384 
    385 	type = vap->va_type;
    386 	vp->v_type = type;
    387 	vp->v_rdev = vap->va_rdev;
    388 	rw_enter(&dv->sdev_contents, RW_WRITER);
    389 	if (type == VDIR) {
    390 		dv->sdev_nlink = 2;
    391 		dv->sdev_flags &= ~SDEV_PERSIST;
    392 		dv->sdev_flags &= ~SDEV_DYNAMIC;
    393 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
    394 		ASSERT(dv->sdev_dotdot);
    395 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
    396 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
    397 		avl_create(&dv->sdev_entries,
    398 		    (int (*)(const void *, const void *))sdev_compare_nodes,
    399 		    sizeof (struct sdev_node),
    400 		    offsetof(struct sdev_node, sdev_avllink));
    401 	} else if (type == VLNK) {
    402 		ASSERT(args);
    403 		dv->sdev_nlink = 1;
    404 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
    405 	} else {
    406 		dv->sdev_nlink = 1;
    407 	}
    408 
    409 	if (!(SDEV_IS_GLOBAL(dv))) {
    410 		dv->sdev_origin = (struct sdev_node *)args;
    411 		dv->sdev_flags &= ~SDEV_PERSIST;
    412 	}
    413 
    414 	/*
    415 	 * shadow node is created here OR
    416 	 * if failed (indicated by dv->sdev_attrvp == NULL),
    417 	 * created later in sdev_setattr
    418 	 */
    419 	if (avp) {
    420 		dv->sdev_attrvp = avp;
    421 	} else {
    422 		if (dv->sdev_attr == NULL) {
    423 			sdev_attr_alloc(dv, vap);
    424 		} else {
    425 			sdev_attr_update(dv, vap);
    426 		}
    427 
    428 		if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
    429 			error = sdev_shadow_node(dv, cred);
    430 	}
    431 
    432 	if (error == 0) {
    433 		/* transition to READY state */
    434 		sdev_set_nodestate(dv, SDEV_READY);
    435 		sdev_nc_node_exists(dv);
    436 	} else {
    437 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
    438 	}
    439 	rw_exit(&dv->sdev_contents);
    440 	return (error);
    441 }
    442 
    443 /*
    444  * setting ZOMBIE state
    445  */
    446 static int
    447 sdev_nodezombied(struct sdev_node *dv)
    448 {
    449 	rw_enter(&dv->sdev_contents, RW_WRITER);
    450 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
    451 	rw_exit(&dv->sdev_contents);
    452 	return (0);
    453 }
    454 
    455 /*
    456  * Build the VROOT sdev_node.
    457  */
    458 /*ARGSUSED*/
    459 struct sdev_node *
    460 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
    461     struct vnode *avp, struct cred *cred)
    462 {
    463 	struct sdev_node *dv;
    464 	struct vnode *vp;
    465 	char devdir[] = "/dev";
    466 
    467 	ASSERT(sdev_node_cache != NULL);
    468 	ASSERT(avp);
    469 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
    470 	vp = SDEVTOV(dv);
    471 	vn_reinit(vp);
    472 	vp->v_flag |= VROOT;
    473 	vp->v_vfsp = vfsp;
    474 	vp->v_type = VDIR;
    475 	vp->v_rdev = devdev;
    476 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
    477 	vn_exists(vp);
    478 
    479 	if (vfsp->vfs_mntpt)
    480 		dv->sdev_name = i_ddi_strdup(
    481 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
    482 	else
    483 		/* vfs_mountdev1 set mount point later */
    484 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
    485 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
    486 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
    487 	dv->sdev_ino = SDEV_ROOTINO;
    488 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
    489 	dv->sdev_dotdot = dv;		/* .. == self */
    490 	dv->sdev_attrvp = avp;
    491 	dv->sdev_attr = NULL;
    492 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
    493 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
    494 	if (strcmp(dv->sdev_name, "/dev") == 0) {
    495 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
    496 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
    497 		dv->sdev_gdir_gen = 0;
    498 	} else {
    499 		dv->sdev_flags = SDEV_BUILD;
    500 		dv->sdev_flags &= ~SDEV_PERSIST;
    501 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    502 		dv->sdev_ldir_gen = 0;
    503 		dv->sdev_devtree_gen = 0;
    504 	}
    505 
    506 	avl_create(&dv->sdev_entries,
    507 	    (int (*)(const void *, const void *))sdev_compare_nodes,
    508 	    sizeof (struct sdev_node),
    509 	    offsetof(struct sdev_node, sdev_avllink));
    510 
    511 	rw_enter(&dv->sdev_contents, RW_WRITER);
    512 	sdev_set_nodestate(dv, SDEV_READY);
    513 	rw_exit(&dv->sdev_contents);
    514 	sdev_nc_node_exists(dv);
    515 	return (dv);
    516 }
    517 
    518 /* directory dependent vop table */
    519 struct sdev_vop_table {
    520 	char *vt_name;				/* subdirectory name */
    521 	const fs_operation_def_t *vt_service;	/* vnodeops table */
    522 	struct vnodeops *vt_vops;		/* constructed vop */
    523 	struct vnodeops **vt_global_vops;	/* global container for vop */
    524 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
    525 	int vt_flags;
    526 };
    527 
    528 /*
    529  * A nice improvement would be to provide a plug-in mechanism
    530  * for this table instead of a const table.
    531  */
    532 static struct sdev_vop_table vtab[] =
    533 {
    534 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
    535 	SDEV_DYNAMIC | SDEV_VTOR },
    536 
    537 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
    538 	SDEV_DYNAMIC | SDEV_VTOR },
    539 
    540 	{ "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
    541 	devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
    542 
    543 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
    544 
    545 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
    546 	SDEV_DYNAMIC | SDEV_VTOR },
    547 
    548 	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
    549 	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
    550 
    551 	{ NULL, NULL, NULL, NULL, NULL, 0}
    552 };
    553 
    554 struct sdev_vop_table *
    555 sdev_match(struct sdev_node *dv)
    556 {
    557 	int vlen;
    558 	int i;
    559 
    560 	for (i = 0; vtab[i].vt_name; i++) {
    561 		if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0)
    562 			return (&vtab[i]);
    563 		if (vtab[i].vt_flags & SDEV_SUBDIR) {
    564 			char *ptr;
    565 
    566 			ASSERT(strlen(dv->sdev_path) > 5);
    567 			ptr = dv->sdev_path + 5;
    568 			vlen = strlen(vtab[i].vt_name);
    569 			if ((strncmp(vtab[i].vt_name, ptr,
    570 			    vlen - 1) == 0) && ptr[vlen] == '/')
    571 				return (&vtab[i]);
    572 		}
    573 
    574 	}
    575 	return (NULL);
    576 }
    577 
    578 /*
    579  *  sets a directory's vnodeops if the directory is in the vtab;
    580  */
    581 static struct vnodeops *
    582 sdev_get_vop(struct sdev_node *dv)
    583 {
    584 	struct sdev_vop_table *vtp;
    585 	char *path;
    586 
    587 	path = dv->sdev_path;
    588 	ASSERT(path);
    589 
    590 	/* gets the relative path to /dev/ */
    591 	path += 5;
    592 
    593 	/* gets the vtab entry it matches */
    594 	if ((vtp = sdev_match(dv)) != NULL) {
    595 		dv->sdev_flags |= vtp->vt_flags;
    596 
    597 		if (vtp->vt_vops) {
    598 			if (vtp->vt_global_vops)
    599 				*(vtp->vt_global_vops) = vtp->vt_vops;
    600 			return (vtp->vt_vops);
    601 		}
    602 
    603 		if (vtp->vt_service) {
    604 			fs_operation_def_t *templ;
    605 			templ = sdev_merge_vtab(vtp->vt_service);
    606 			if (vn_make_ops(vtp->vt_name,
    607 			    (const fs_operation_def_t *)templ,
    608 			    &vtp->vt_vops) != 0) {
    609 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
    610 				    vtp->vt_name);
    611 				/*NOTREACHED*/
    612 			}
    613 			if (vtp->vt_global_vops) {
    614 				*(vtp->vt_global_vops) = vtp->vt_vops;
    615 			}
    616 			sdev_free_vtab(templ);
    617 			return (vtp->vt_vops);
    618 		}
    619 		return (sdev_vnodeops);
    620 	}
    621 
    622 	/* child inherits the persistence of the parent */
    623 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
    624 		dv->sdev_flags |= SDEV_PERSIST;
    625 
    626 	return (sdev_vnodeops);
    627 }
    628 
    629 static void
    630 sdev_set_no_negcache(struct sdev_node *dv)
    631 {
    632 	int i;
    633 	char *path;
    634 
    635 	ASSERT(dv->sdev_path);
    636 	path = dv->sdev_path + strlen("/dev/");
    637 
    638 	for (i = 0; vtab[i].vt_name; i++) {
    639 		if (strcmp(vtab[i].vt_name, path) == 0) {
    640 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
    641 				dv->sdev_flags |= SDEV_NO_NCACHE;
    642 			break;
    643 		}
    644 	}
    645 }
    646 
    647 void *
    648 sdev_get_vtor(struct sdev_node *dv)
    649 {
    650 	struct sdev_vop_table *vtp;
    651 
    652 	vtp = sdev_match(dv);
    653 	if (vtp)
    654 		return ((void *)vtp->vt_vtor);
    655 	else
    656 		return (NULL);
    657 }
    658 
    659 /*
    660  * Build the base root inode
    661  */
    662 ino_t
    663 sdev_mkino(struct sdev_node *dv)
    664 {
    665 	ino_t	ino;
    666 
    667 	/*
    668 	 * for now, follow the lead of tmpfs here
    669 	 * need to someday understand the requirements here
    670 	 */
    671 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
    672 	ino += SDEV_ROOTINO + 1;
    673 
    674 	return (ino);
    675 }
    676 
    677 int
    678 sdev_getlink(struct vnode *linkvp, char **link)
    679 {
    680 	int err;
    681 	char *buf;
    682 	struct uio uio = {0};
    683 	struct iovec iov = {0};
    684 
    685 	if (linkvp == NULL)
    686 		return (ENOENT);
    687 	ASSERT(linkvp->v_type == VLNK);
    688 
    689 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
    690 	iov.iov_base = buf;
    691 	iov.iov_len = MAXPATHLEN;
    692 	uio.uio_iov = &iov;
    693 	uio.uio_iovcnt = 1;
    694 	uio.uio_resid = MAXPATHLEN;
    695 	uio.uio_segflg = UIO_SYSSPACE;
    696 	uio.uio_llimit = MAXOFFSET_T;
    697 
    698 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
    699 	if (err) {
    700 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
    701 		kmem_free(buf, MAXPATHLEN);
    702 		return (ENOENT);
    703 	}
    704 
    705 	/* mission complete */
    706 	*link = i_ddi_strdup(buf, KM_SLEEP);
    707 	kmem_free(buf, MAXPATHLEN);
    708 	return (0);
    709 }
    710 
    711 /*
    712  * A convenient wrapper to get the devfs node vnode for a device
    713  * minor functionality: readlink() of a /dev symlink
    714  * Place the link into dv->sdev_symlink
    715  */
    716 static int
    717 sdev_follow_link(struct sdev_node *dv)
    718 {
    719 	int err;
    720 	struct vnode *linkvp;
    721 	char *link = NULL;
    722 
    723 	linkvp = SDEVTOV(dv);
    724 	if (linkvp == NULL)
    725 		return (ENOENT);
    726 	ASSERT(linkvp->v_type == VLNK);
    727 	err = sdev_getlink(linkvp, &link);
    728 	if (err) {
    729 		(void) sdev_nodezombied(dv);
    730 		dv->sdev_symlink = NULL;
    731 		return (ENOENT);
    732 	}
    733 
    734 	ASSERT(link != NULL);
    735 	dv->sdev_symlink = link;
    736 	return (0);
    737 }
    738 
    739 static int
    740 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
    741 {
    742 	vtype_t otype = SDEVTOV(dv)->v_type;
    743 
    744 	/*
    745 	 * existing sdev_node has a different type.
    746 	 */
    747 	if (otype != nvap->va_type) {
    748 		sdcmn_err9(("sdev_node_check: existing node "
    749 		    "  %s type %d does not match new node type %d\n",
    750 		    dv->sdev_name, otype, nvap->va_type));
    751 		return (EEXIST);
    752 	}
    753 
    754 	/*
    755 	 * For a symlink, the target should be the same.
    756 	 */
    757 	if (otype == VLNK) {
    758 		ASSERT(nargs != NULL);
    759 		ASSERT(dv->sdev_symlink != NULL);
    760 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
    761 			sdcmn_err9(("sdev_node_check: existing node "
    762 			    " %s has different symlink %s as new node "
    763 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
    764 			    (char *)nargs));
    765 			return (EEXIST);
    766 		}
    767 	}
    768 
    769 	return (0);
    770 }
    771 
    772 /*
    773  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
    774  *
    775  * arguments:
    776  *	- ddv (parent)
    777  *	- nm (child name)
    778  *	- newdv (sdev_node for nm is returned here)
    779  *	- vap (vattr for the node to be created, va_type should be set.
    780  *	- avp (attribute vnode)
    781  *	  the defaults should be used if unknown)
    782  *	- cred
    783  *	- args
    784  *	    . tnm (for VLNK)
    785  *	    . global sdev_node (for !SDEV_GLOBAL)
    786  * 	- state: SDEV_INIT, SDEV_READY
    787  *
    788  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
    789  *
    790  * NOTE:  directory contents writers lock needs to be held before
    791  *	  calling this routine.
    792  */
    793 int
    794 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
    795     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
    796     sdev_node_state_t state)
    797 {
    798 	int error = 0;
    799 	sdev_node_state_t node_state;
    800 	struct sdev_node *dv = NULL;
    801 
    802 	ASSERT(state != SDEV_ZOMBIE);
    803 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
    804 
    805 	if (*newdv) {
    806 		dv = *newdv;
    807 	} else {
    808 		/* allocate and initialize a sdev_node */
    809 		if (ddv->sdev_state == SDEV_ZOMBIE) {
    810 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
    811 			    ddv->sdev_path));
    812 			return (ENOENT);
    813 		}
    814 
    815 		error = sdev_nodeinit(ddv, nm, &dv, vap);
    816 		if (error != 0) {
    817 			sdcmn_err9(("sdev_mknode: error %d,"
    818 			    " name %s can not be initialized\n",
    819 			    error, nm));
    820 			return (error);
    821 		}
    822 		ASSERT(dv);
    823 
    824 		/* insert into the directory cache */
    825 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
    826 		if (error) {
    827 			sdcmn_err9(("sdev_mknode: node %s can not"
    828 			    " be added into directory cache\n", nm));
    829 			return (ENOENT);
    830 		}
    831 	}
    832 
    833 	ASSERT(dv);
    834 	node_state = dv->sdev_state;
    835 	ASSERT(node_state != SDEV_ZOMBIE);
    836 
    837 	if (state == SDEV_READY) {
    838 		switch (node_state) {
    839 		case SDEV_INIT:
    840 			error = sdev_nodeready(dv, vap, avp, args, cred);
    841 			if (error) {
    842 				sdcmn_err9(("sdev_mknode: node %s can NOT"
    843 				    " be transitioned into READY state, "
    844 				    "error %d\n", nm, error));
    845 			}
    846 			break;
    847 		case SDEV_READY:
    848 			/*
    849 			 * Do some sanity checking to make sure
    850 			 * the existing sdev_node is what has been
    851 			 * asked for.
    852 			 */
    853 			error = sdev_node_check(dv, vap, args);
    854 			break;
    855 		default:
    856 			break;
    857 		}
    858 	}
    859 
    860 	if (!error) {
    861 		*newdv = dv;
    862 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
    863 	} else {
    864 		SDEV_SIMPLE_RELE(dv);
    865 		*newdv = NULL;
    866 	}
    867 
    868 	return (error);
    869 }
    870 
    871 /*
    872  * convenient wrapper to change vp's ATIME, CTIME and MTIME
    873  */
    874 void
    875 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
    876 {
    877 	struct vattr attr;
    878 	timestruc_t now;
    879 	int err;
    880 
    881 	ASSERT(vp);
    882 	gethrestime(&now);
    883 	if (mask & AT_CTIME)
    884 		attr.va_ctime = now;
    885 	if (mask & AT_MTIME)
    886 		attr.va_mtime = now;
    887 	if (mask & AT_ATIME)
    888 		attr.va_atime = now;
    889 
    890 	attr.va_mask = (mask & AT_TIMES);
    891 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
    892 	if (err && (err != EROFS)) {
    893 		sdcmn_err(("update timestamps error %d\n", err));
    894 	}
    895 }
    896 
    897 /*
    898  * the backing store vnode is released here
    899  */
    900 /*ARGSUSED1*/
    901 void
    902 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
    903 {
    904 	/* no references */
    905 	ASSERT(dv->sdev_nlink == 0);
    906 
    907 	if (dv->sdev_attrvp != NULLVP) {
    908 		VN_RELE(dv->sdev_attrvp);
    909 		/*
    910 		 * reset the attrvp so that no more
    911 		 * references can be made on this already
    912 		 * vn_rele() vnode
    913 		 */
    914 		dv->sdev_attrvp = NULLVP;
    915 	}
    916 
    917 	if (dv->sdev_attr != NULL) {
    918 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
    919 		dv->sdev_attr = NULL;
    920 	}
    921 
    922 	if (dv->sdev_name != NULL) {
    923 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
    924 		dv->sdev_name = NULL;
    925 	}
    926 
    927 	if (dv->sdev_symlink != NULL) {
    928 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
    929 		dv->sdev_symlink = NULL;
    930 	}
    931 
    932 	if (dv->sdev_path) {
    933 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
    934 		dv->sdev_path = NULL;
    935 	}
    936 
    937 	if (!SDEV_IS_GLOBAL(dv))
    938 		sdev_prof_free(dv);
    939 
    940 	if (SDEVTOV(dv)->v_type == VDIR) {
    941 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
    942 		avl_destroy(&dv->sdev_entries);
    943 	}
    944 
    945 	mutex_destroy(&dv->sdev_lookup_lock);
    946 	cv_destroy(&dv->sdev_lookup_cv);
    947 
    948 	/* return node to initial state as per constructor */
    949 	(void) memset((void *)&dv->sdev_instance_data, 0,
    950 	    sizeof (dv->sdev_instance_data));
    951 	vn_invalid(SDEVTOV(dv));
    952 	kmem_cache_free(sdev_node_cache, dv);
    953 }
    954 
    955 /*
    956  * DIRECTORY CACHE lookup
    957  */
    958 struct sdev_node *
    959 sdev_findbyname(struct sdev_node *ddv, char *nm)
    960 {
    961 	struct sdev_node *dv;
    962 	struct sdev_node dvtmp;
    963 	avl_index_t	where;
    964 
    965 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
    966 
    967 	dvtmp.sdev_name = nm;
    968 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
    969 	if (dv) {
    970 		ASSERT(dv->sdev_dotdot == ddv);
    971 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
    972 		SDEV_HOLD(dv);
    973 		return (dv);
    974 	}
    975 	return (NULL);
    976 }
    977 
    978 /*
    979  * Inserts a new sdev_node in a parent directory
    980  */
    981 void
    982 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
    983 {
    984 	avl_index_t where;
    985 
    986 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
    987 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
    988 	ASSERT(ddv->sdev_nlink >= 2);
    989 	ASSERT(dv->sdev_nlink == 0);
    990 
    991 	dv->sdev_dotdot = ddv;
    992 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
    993 	avl_insert(&ddv->sdev_entries, dv, where);
    994 	ddv->sdev_nlink++;
    995 }
    996 
    997 /*
    998  * The following check is needed because while sdev_nodes are linked
    999  * in SDEV_INIT state, they have their link counts incremented only
   1000  * in SDEV_READY state.
   1001  */
   1002 static void
   1003 decr_link(struct sdev_node *dv)
   1004 {
   1005 	if (dv->sdev_state != SDEV_INIT)
   1006 		dv->sdev_nlink--;
   1007 	else
   1008 		ASSERT(dv->sdev_nlink == 0);
   1009 }
   1010 
   1011 /*
   1012  * Delete an existing dv from directory cache
   1013  *
   1014  * In the case of a node is still held by non-zero reference count,
   1015  *     the node is put into ZOMBIE state. Once the reference count
   1016  *     reaches "0", the node is unlinked and destroyed,
   1017  *     in sdev_inactive().
   1018  */
   1019 static int
   1020 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
   1021 {
   1022 	struct vnode *vp;
   1023 
   1024 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1025 
   1026 	vp = SDEVTOV(dv);
   1027 	mutex_enter(&vp->v_lock);
   1028 
   1029 	/* dv is held still */
   1030 	if (vp->v_count > 1) {
   1031 		rw_enter(&dv->sdev_contents, RW_WRITER);
   1032 		if (dv->sdev_state == SDEV_READY) {
   1033 			sdcmn_err9((
   1034 			    "sdev_dirdelete: node %s busy with count %d\n",
   1035 			    dv->sdev_name, vp->v_count));
   1036 			dv->sdev_state = SDEV_ZOMBIE;
   1037 		}
   1038 		rw_exit(&dv->sdev_contents);
   1039 		--vp->v_count;
   1040 		mutex_exit(&vp->v_lock);
   1041 		return (EBUSY);
   1042 	}
   1043 	ASSERT(vp->v_count == 1);
   1044 
   1045 	/* unlink from the memory cache */
   1046 	ddv->sdev_nlink--;	/* .. to above */
   1047 	if (vp->v_type == VDIR) {
   1048 		decr_link(dv);		/* . to self */
   1049 	}
   1050 
   1051 	avl_remove(&ddv->sdev_entries, dv);
   1052 	decr_link(dv);	/* name, back to zero */
   1053 	vp->v_count--;
   1054 	mutex_exit(&vp->v_lock);
   1055 
   1056 	/* destroy the node */
   1057 	sdev_nodedestroy(dv, 0);
   1058 	return (0);
   1059 }
   1060 
   1061 /*
   1062  * check if the source is in the path of the target
   1063  *
   1064  * source and target are different
   1065  */
   1066 /*ARGSUSED2*/
   1067 static int
   1068 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
   1069 {
   1070 	int error = 0;
   1071 	struct sdev_node *dotdot, *dir;
   1072 
   1073 	dotdot = tdv->sdev_dotdot;
   1074 	ASSERT(dotdot);
   1075 
   1076 	/* fs root */
   1077 	if (dotdot == tdv) {
   1078 		return (0);
   1079 	}
   1080 
   1081 	for (;;) {
   1082 		/*
   1083 		 * avoid error cases like
   1084 		 *	mv a a/b
   1085 		 *	mv a a/b/c
   1086 		 *	etc.
   1087 		 */
   1088 		if (dotdot == sdv) {
   1089 			error = EINVAL;
   1090 			break;
   1091 		}
   1092 
   1093 		dir = dotdot;
   1094 		dotdot = dir->sdev_dotdot;
   1095 
   1096 		/* done checking because root is reached */
   1097 		if (dir == dotdot) {
   1098 			break;
   1099 		}
   1100 	}
   1101 	return (error);
   1102 }
   1103 
   1104 int
   1105 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
   1106     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
   1107     struct cred *cred)
   1108 {
   1109 	int error = 0;
   1110 	struct vnode *ovp = SDEVTOV(odv);
   1111 	struct vnode *nvp;
   1112 	struct vattr vattr;
   1113 	int doingdir = (ovp->v_type == VDIR);
   1114 	char *link = NULL;
   1115 	int samedir = (oddv == nddv) ? 1 : 0;
   1116 	int bkstore = 0;
   1117 	struct sdev_node *idv = NULL;
   1118 	struct sdev_node *ndv = NULL;
   1119 	timestruc_t now;
   1120 
   1121 	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
   1122 	error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
   1123 	if (error)
   1124 		return (error);
   1125 
   1126 	if (!samedir)
   1127 		rw_enter(&oddv->sdev_contents, RW_WRITER);
   1128 	rw_enter(&nddv->sdev_contents, RW_WRITER);
   1129 
   1130 	/*
   1131 	 * the source may have been deleted by another thread before
   1132 	 * we gets here.
   1133 	 */
   1134 	if (odv->sdev_state != SDEV_READY) {
   1135 		error = ENOENT;
   1136 		goto err_out;
   1137 	}
   1138 
   1139 	if (doingdir && (odv == nddv)) {
   1140 		error = EINVAL;
   1141 		goto err_out;
   1142 	}
   1143 
   1144 	/*
   1145 	 * If renaming a directory, and the parents are different (".." must be
   1146 	 * changed) then the source dir must not be in the dir hierarchy above
   1147 	 * the target since it would orphan everything below the source dir.
   1148 	 */
   1149 	if (doingdir && (oddv != nddv)) {
   1150 		error = sdev_checkpath(odv, nddv, cred);
   1151 		if (error)
   1152 			goto err_out;
   1153 	}
   1154 
   1155 	/* destination existing */
   1156 	if (*ndvp) {
   1157 		nvp = SDEVTOV(*ndvp);
   1158 		ASSERT(nvp);
   1159 
   1160 		/* handling renaming to itself */
   1161 		if (odv == *ndvp) {
   1162 			error = 0;
   1163 			goto err_out;
   1164 		}
   1165 
   1166 		if (nvp->v_type == VDIR) {
   1167 			if (!doingdir) {
   1168 				error = EISDIR;
   1169 				goto err_out;
   1170 			}
   1171 
   1172 			if (vn_vfswlock(nvp)) {
   1173 				error = EBUSY;
   1174 				goto err_out;
   1175 			}
   1176 
   1177 			if (vn_mountedvfs(nvp) != NULL) {
   1178 				vn_vfsunlock(nvp);
   1179 				error = EBUSY;
   1180 				goto err_out;
   1181 			}
   1182 
   1183 			/* in case dir1 exists in dir2 and "mv dir1 dir2" */
   1184 			if ((*ndvp)->sdev_nlink > 2) {
   1185 				vn_vfsunlock(nvp);
   1186 				error = EEXIST;
   1187 				goto err_out;
   1188 			}
   1189 			vn_vfsunlock(nvp);
   1190 
   1191 			(void) sdev_dirdelete(nddv, *ndvp);
   1192 			*ndvp = NULL;
   1193 			ASSERT(nddv->sdev_attrvp);
   1194 			error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
   1195 			    nddv->sdev_attrvp, cred, NULL, 0);
   1196 			if (error)
   1197 				goto err_out;
   1198 		} else {
   1199 			if (doingdir) {
   1200 				error = ENOTDIR;
   1201 				goto err_out;
   1202 			}
   1203 
   1204 			if (SDEV_IS_PERSIST((*ndvp))) {
   1205 				bkstore = 1;
   1206 			}
   1207 
   1208 			/*
   1209 			 * get rid of the node from the directory cache
   1210 			 * note, in case EBUSY is returned, the ZOMBIE
   1211 			 * node is taken care in sdev_mknode.
   1212 			 */
   1213 			(void) sdev_dirdelete(nddv, *ndvp);
   1214 			*ndvp = NULL;
   1215 			if (bkstore) {
   1216 				ASSERT(nddv->sdev_attrvp);
   1217 				error = VOP_REMOVE(nddv->sdev_attrvp,
   1218 				    nnm, cred, NULL, 0);
   1219 				if (error)
   1220 					goto err_out;
   1221 			}
   1222 		}
   1223 	}
   1224 
   1225 	/* fix the source for a symlink */
   1226 	if (vattr.va_type == VLNK) {
   1227 		if (odv->sdev_symlink == NULL) {
   1228 			error = sdev_follow_link(odv);
   1229 			if (error) {
   1230 				error = ENOENT;
   1231 				goto err_out;
   1232 			}
   1233 		}
   1234 		ASSERT(odv->sdev_symlink);
   1235 		link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
   1236 	}
   1237 
   1238 	/*
   1239 	 * make a fresh node from the source attrs
   1240 	 */
   1241 	ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
   1242 	error = sdev_mknode(nddv, nnm, ndvp, &vattr,
   1243 	    NULL, (void *)link, cred, SDEV_READY);
   1244 
   1245 	if (link)
   1246 		kmem_free(link, strlen(link) + 1);
   1247 
   1248 	if (error)
   1249 		goto err_out;
   1250 	ASSERT(*ndvp);
   1251 	ASSERT((*ndvp)->sdev_state == SDEV_READY);
   1252 
   1253 	/* move dir contents */
   1254 	if (doingdir) {
   1255 		for (idv = SDEV_FIRST_ENTRY(odv); idv;
   1256 		    idv = SDEV_NEXT_ENTRY(odv, idv)) {
   1257 			error = sdev_rnmnode(odv, idv,
   1258 			    (struct sdev_node *)(*ndvp), &ndv,
   1259 			    idv->sdev_name, cred);
   1260 			if (error)
   1261 				goto err_out;
   1262 			ndv = NULL;
   1263 		}
   1264 	}
   1265 
   1266 	if ((*ndvp)->sdev_attrvp) {
   1267 		sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
   1268 		    AT_CTIME|AT_ATIME);
   1269 	} else {
   1270 		ASSERT((*ndvp)->sdev_attr);
   1271 		gethrestime(&now);
   1272 		(*ndvp)->sdev_attr->va_ctime = now;
   1273 		(*ndvp)->sdev_attr->va_atime = now;
   1274 	}
   1275 
   1276 	if (nddv->sdev_attrvp) {
   1277 		sdev_update_timestamps(nddv->sdev_attrvp, kcred,
   1278 		    AT_MTIME|AT_ATIME);
   1279 	} else {
   1280 		ASSERT(nddv->sdev_attr);
   1281 		gethrestime(&now);
   1282 		nddv->sdev_attr->va_mtime = now;
   1283 		nddv->sdev_attr->va_atime = now;
   1284 	}
   1285 	rw_exit(&nddv->sdev_contents);
   1286 	if (!samedir)
   1287 		rw_exit(&oddv->sdev_contents);
   1288 
   1289 	SDEV_RELE(*ndvp);
   1290 	return (error);
   1291 
   1292 err_out:
   1293 	rw_exit(&nddv->sdev_contents);
   1294 	if (!samedir)
   1295 		rw_exit(&oddv->sdev_contents);
   1296 	return (error);
   1297 }
   1298 
   1299 /*
   1300  * Merge sdev_node specific information into an attribute structure.
   1301  *
   1302  * note: sdev_node is not locked here
   1303  */
   1304 void
   1305 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
   1306 {
   1307 	struct vnode *vp = SDEVTOV(dv);
   1308 
   1309 	vap->va_nlink = dv->sdev_nlink;
   1310 	vap->va_nodeid = dv->sdev_ino;
   1311 	vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
   1312 	vap->va_type = vp->v_type;
   1313 
   1314 	if (vp->v_type == VDIR) {
   1315 		vap->va_rdev = 0;
   1316 		vap->va_fsid = vp->v_rdev;
   1317 	} else if (vp->v_type == VLNK) {
   1318 		vap->va_rdev = 0;
   1319 		vap->va_mode  &= ~S_IFMT;
   1320 		vap->va_mode |= S_IFLNK;
   1321 	} else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
   1322 		vap->va_rdev = vp->v_rdev;
   1323 		vap->va_mode &= ~S_IFMT;
   1324 		if (vap->va_type == VCHR)
   1325 			vap->va_mode |= S_IFCHR;
   1326 		else
   1327 			vap->va_mode |= S_IFBLK;
   1328 	} else {
   1329 		vap->va_rdev = 0;
   1330 	}
   1331 }
   1332 
   1333 struct vattr *
   1334 sdev_getdefault_attr(enum vtype type)
   1335 {
   1336 	if (type == VDIR)
   1337 		return (&sdev_vattr_dir);
   1338 	else if (type == VCHR)
   1339 		return (&sdev_vattr_chr);
   1340 	else if (type == VBLK)
   1341 		return (&sdev_vattr_blk);
   1342 	else if (type == VLNK)
   1343 		return (&sdev_vattr_lnk);
   1344 	else
   1345 		return (NULL);
   1346 }
   1347 int
   1348 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
   1349 {
   1350 	int rv = 0;
   1351 	struct vnode *vp = SDEVTOV(dv);
   1352 
   1353 	switch (vp->v_type) {
   1354 	case VCHR:
   1355 	case VBLK:
   1356 		/*
   1357 		 * If vnode is a device, return special vnode instead
   1358 		 * (though it knows all about -us- via sp->s_realvp)
   1359 		 */
   1360 		*vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
   1361 		VN_RELE(vp);
   1362 		if (*vpp == NULLVP)
   1363 			rv = ENOSYS;
   1364 		break;
   1365 	default:	/* most types are returned as is */
   1366 		*vpp = vp;
   1367 		break;
   1368 	}
   1369 	return (rv);
   1370 }
   1371 
   1372 /*
   1373  * junction between devname and root file system, e.g. ufs
   1374  */
   1375 int
   1376 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
   1377 {
   1378 	struct vnode *rdvp = ddv->sdev_attrvp;
   1379 	int rval = 0;
   1380 
   1381 	ASSERT(rdvp);
   1382 
   1383 	rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
   1384 	    NULL);
   1385 	return (rval);
   1386 }
   1387 
   1388 static int
   1389 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
   1390 {
   1391 	struct sdev_node *dv = NULL;
   1392 	char	*nm;
   1393 	struct vnode *dirvp;
   1394 	int	error;
   1395 	vnode_t	*vp;
   1396 	int eof;
   1397 	struct iovec iov;
   1398 	struct uio uio;
   1399 	struct dirent64 *dp;
   1400 	dirent64_t *dbuf;
   1401 	size_t dbuflen;
   1402 	struct vattr vattr;
   1403 	char *link = NULL;
   1404 
   1405 	if (ddv->sdev_attrvp == NULL)
   1406 		return (0);
   1407 	if (!(ddv->sdev_flags & SDEV_BUILD))
   1408 		return (0);
   1409 
   1410 	dirvp = ddv->sdev_attrvp;
   1411 	VN_HOLD(dirvp);
   1412 	dbuf = kmem_zalloc(dlen, KM_SLEEP);
   1413 
   1414 	uio.uio_iov = &iov;
   1415 	uio.uio_iovcnt = 1;
   1416 	uio.uio_segflg = UIO_SYSSPACE;
   1417 	uio.uio_fmode = 0;
   1418 	uio.uio_extflg = UIO_COPY_CACHED;
   1419 	uio.uio_loffset = 0;
   1420 	uio.uio_llimit = MAXOFFSET_T;
   1421 
   1422 	eof = 0;
   1423 	error = 0;
   1424 	while (!error && !eof) {
   1425 		uio.uio_resid = dlen;
   1426 		iov.iov_base = (char *)dbuf;
   1427 		iov.iov_len = dlen;
   1428 		(void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
   1429 		error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
   1430 		VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
   1431 
   1432 		dbuflen = dlen - uio.uio_resid;
   1433 		if (error || dbuflen == 0)
   1434 			break;
   1435 
   1436 		if (!(ddv->sdev_flags & SDEV_BUILD))
   1437 			break;
   1438 
   1439 		for (dp = dbuf; ((intptr_t)dp <
   1440 		    (intptr_t)dbuf + dbuflen);
   1441 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
   1442 			nm = dp->d_name;
   1443 
   1444 			if (strcmp(nm, ".") == 0 ||
   1445 			    strcmp(nm, "..") == 0)
   1446 				continue;
   1447 
   1448 			vp = NULLVP;
   1449 			dv = sdev_cache_lookup(ddv, nm);
   1450 			if (dv) {
   1451 				if (dv->sdev_state != SDEV_ZOMBIE) {
   1452 					SDEV_SIMPLE_RELE(dv);
   1453 				} else {
   1454 					/*
   1455 					 * A ZOMBIE node may not have been
   1456 					 * cleaned up from the backing store,
   1457 					 * bypass this entry in this case,
   1458 					 * and clean it up from the directory
   1459 					 * cache if this is the last call.
   1460 					 */
   1461 					(void) sdev_dirdelete(ddv, dv);
   1462 				}
   1463 				continue;
   1464 			}
   1465 
   1466 			/* refill the cache if not already */
   1467 			error = devname_backstore_lookup(ddv, nm, &vp);
   1468 			if (error)
   1469 				continue;
   1470 
   1471 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
   1472 			error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
   1473 			if (error)
   1474 				continue;
   1475 
   1476 			if (vattr.va_type == VLNK) {
   1477 				error = sdev_getlink(vp, &link);
   1478 				if (error) {
   1479 					continue;
   1480 				}
   1481 				ASSERT(link != NULL);
   1482 			}
   1483 
   1484 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
   1485 				rw_exit(&ddv->sdev_contents);
   1486 				rw_enter(&ddv->sdev_contents, RW_WRITER);
   1487 			}
   1488 			error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
   1489 			    cred, SDEV_READY);
   1490 			rw_downgrade(&ddv->sdev_contents);
   1491 
   1492 			if (link != NULL) {
   1493 				kmem_free(link, strlen(link) + 1);
   1494 				link = NULL;
   1495 			}
   1496 
   1497 			if (!error) {
   1498 				ASSERT(dv);
   1499 				ASSERT(dv->sdev_state != SDEV_ZOMBIE);
   1500 				SDEV_SIMPLE_RELE(dv);
   1501 			}
   1502 			vp = NULL;
   1503 			dv = NULL;
   1504 		}
   1505 	}
   1506 
   1507 done:
   1508 	VN_RELE(dirvp);
   1509 	kmem_free(dbuf, dlen);
   1510 
   1511 	return (error);
   1512 }
   1513 
   1514 void
   1515 sdev_filldir_dynamic(struct sdev_node *ddv)
   1516 {
   1517 	int error;
   1518 	int i;
   1519 	struct vattr vattr;
   1520 	struct vattr *vap = &vattr;
   1521 	char *nm = NULL;
   1522 	struct sdev_node *dv = NULL;
   1523 
   1524 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1525 	ASSERT((ddv->sdev_flags & SDEV_BUILD));
   1526 
   1527 	*vap = *sdev_getdefault_attr(VDIR);	/* note structure copy here */
   1528 	gethrestime(&vap->va_atime);
   1529 	vap->va_mtime = vap->va_atime;
   1530 	vap->va_ctime = vap->va_atime;
   1531 	for (i = 0; vtab[i].vt_name != NULL; i++) {
   1532 		nm = vtab[i].vt_name;
   1533 		ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1534 		dv = NULL;
   1535 		error = sdev_mknode(ddv, nm, &dv, vap, NULL,
   1536 		    NULL, kcred, SDEV_READY);
   1537 		if (error) {
   1538 			cmn_err(CE_WARN, "%s/%s: error %d\n",
   1539 			    ddv->sdev_name, nm, error);
   1540 		} else {
   1541 			ASSERT(dv);
   1542 			ASSERT(dv->sdev_state != SDEV_ZOMBIE);
   1543 			SDEV_SIMPLE_RELE(dv);
   1544 		}
   1545 	}
   1546 }
   1547 
   1548 /*
   1549  * Creating a backing store entry based on sdev_attr.
   1550  * This is called either as part of node creation in a persistent directory
   1551  * or from setattr/setsecattr to persist access attributes across reboot.
   1552  */
   1553 int
   1554 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
   1555 {
   1556 	int error = 0;
   1557 	struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
   1558 	struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
   1559 	struct vattr *vap = dv->sdev_attr;
   1560 	char *nm = dv->sdev_name;
   1561 	struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
   1562 
   1563 	ASSERT(dv && dv->sdev_name && rdvp);
   1564 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
   1565 
   1566 lookup:
   1567 	/* try to find it in the backing store */
   1568 	error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
   1569 	    NULL);
   1570 	if (error == 0) {
   1571 		if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
   1572 			VN_HOLD(rrvp);
   1573 			VN_RELE(*rvp);
   1574 			*rvp = rrvp;
   1575 		}
   1576 
   1577 		kmem_free(dv->sdev_attr, sizeof (vattr_t));
   1578 		dv->sdev_attr = NULL;
   1579 		dv->sdev_attrvp = *rvp;
   1580 		return (0);
   1581 	}
   1582 
   1583 	/* let's try to persist the node */
   1584 	gethrestime(&vap->va_atime);
   1585 	vap->va_mtime = vap->va_atime;
   1586 	vap->va_ctime = vap->va_atime;
   1587 	vap->va_mask |= AT_TYPE|AT_MODE;
   1588 	switch (vap->va_type) {
   1589 	case VDIR:
   1590 		error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
   1591 		sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
   1592 		    (void *)(*rvp), error));
   1593 		break;
   1594 	case VCHR:
   1595 	case VBLK:
   1596 	case VREG:
   1597 	case VDOOR:
   1598 		error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
   1599 		    rvp, cred, 0, NULL, NULL);
   1600 		sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
   1601 		    (void *)(*rvp), error));
   1602 		if (!error)
   1603 			VN_RELE(*rvp);
   1604 		break;
   1605 	case VLNK:
   1606 		ASSERT(dv->sdev_symlink);
   1607 		error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
   1608 		    NULL, 0);
   1609 		sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
   1610 		    error));
   1611 		break;
   1612 	default:
   1613 		cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
   1614 		    "create\n", nm);
   1615 		/*NOTREACHED*/
   1616 	}
   1617 
   1618 	/* go back to lookup to factor out spec node and set attrvp */
   1619 	if (error == 0)
   1620 		goto lookup;
   1621 
   1622 	sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
   1623 	return (error);
   1624 }
   1625 
   1626 static int
   1627 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
   1628 {
   1629 	int error = 0;
   1630 	struct sdev_node *dup = NULL;
   1631 
   1632 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1633 	if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
   1634 		sdev_direnter(ddv, *dv);
   1635 	} else {
   1636 		if (dup->sdev_state == SDEV_ZOMBIE) {
   1637 			error = sdev_dirdelete(ddv, dup);
   1638 			/*
   1639 			 * The ZOMBIE node is still hanging
   1640 			 * around with more than one reference counts.
   1641 			 * Fail the new node creation so that
   1642 			 * the directory cache won't have
   1643 			 * duplicate entries for the same named node
   1644 			 */
   1645 			if (error == EBUSY) {
   1646 				SDEV_SIMPLE_RELE(*dv);
   1647 				sdev_nodedestroy(*dv, 0);
   1648 				*dv = NULL;
   1649 				return (error);
   1650 			}
   1651 			sdev_direnter(ddv, *dv);
   1652 		} else {
   1653 			ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
   1654 			SDEV_SIMPLE_RELE(*dv);
   1655 			sdev_nodedestroy(*dv, 0);
   1656 			*dv = dup;
   1657 		}
   1658 	}
   1659 
   1660 	return (0);
   1661 }
   1662 
   1663 static int
   1664 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
   1665 {
   1666 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1667 	return (sdev_dirdelete(ddv, *dv));
   1668 }
   1669 
   1670 /*
   1671  * update the in-core directory cache
   1672  */
   1673 int
   1674 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
   1675     sdev_cache_ops_t ops)
   1676 {
   1677 	int error = 0;
   1678 
   1679 	ASSERT((SDEV_HELD(*dv)));
   1680 
   1681 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1682 	switch (ops) {
   1683 	case SDEV_CACHE_ADD:
   1684 		error = sdev_cache_add(ddv, dv, nm);
   1685 		break;
   1686 	case SDEV_CACHE_DELETE:
   1687 		error = sdev_cache_delete(ddv, dv);
   1688 		break;
   1689 	default:
   1690 		break;
   1691 	}
   1692 
   1693 	return (error);
   1694 }
   1695 
   1696 /*
   1697  * retrieve the named entry from the directory cache
   1698  */
   1699 struct sdev_node *
   1700 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
   1701 {
   1702 	struct sdev_node *dv = NULL;
   1703 
   1704 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
   1705 	dv = sdev_findbyname(ddv, nm);
   1706 
   1707 	return (dv);
   1708 }
   1709 
   1710 /*
   1711  * Implicit reconfig for nodes constructed by a link generator
   1712  * Start devfsadm if needed, or if devfsadm is in progress,
   1713  * prepare to block on devfsadm either completing or
   1714  * constructing the desired node.  As devfsadmd is global
   1715  * in scope, constructing all necessary nodes, we only
   1716  * need to initiate it once.
   1717  */
   1718 static int
   1719 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
   1720 {
   1721 	int error = 0;
   1722 
   1723 	if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
   1724 		sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
   1725 		    ddv->sdev_name, nm, devfsadm_state));
   1726 		mutex_enter(&dv->sdev_lookup_lock);
   1727 		SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
   1728 		mutex_exit(&dv->sdev_lookup_lock);
   1729 		error = 0;
   1730 	} else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
   1731 		sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
   1732 		    ddv->sdev_name, nm, devfsadm_state));
   1733 
   1734 		sdev_devfsadmd_thread(ddv, dv, kcred);
   1735 		mutex_enter(&dv->sdev_lookup_lock);
   1736 		SDEV_BLOCK_OTHERS(dv,
   1737 		    (SDEV_LOOKUP | SDEV_LGWAITING));
   1738 		mutex_exit(&dv->sdev_lookup_lock);
   1739 		error = 0;
   1740 	} else {
   1741 		error = -1;
   1742 	}
   1743 
   1744 	return (error);
   1745 }
   1746 
   1747 /*
   1748  *  Support for specialized device naming construction mechanisms
   1749  */
   1750 static int
   1751 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
   1752     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
   1753     void *, char *), int flags, struct cred *cred)
   1754 {
   1755 	int rv = 0;
   1756 	char *physpath = NULL;
   1757 	struct vattr vattr;
   1758 	struct vattr *vap = &vattr;
   1759 	struct sdev_node *dv = NULL;
   1760 
   1761 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1762 	if (flags & SDEV_VLINK) {
   1763 		physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1764 		rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
   1765 		    NULL);
   1766 		if (rv) {
   1767 			kmem_free(physpath, MAXPATHLEN);
   1768 			return (-1);
   1769 		}
   1770 
   1771 		*vap = *sdev_getdefault_attr(VLNK);	/* structure copy */
   1772 		vap->va_size = strlen(physpath);
   1773 		gethrestime(&vap->va_atime);
   1774 		vap->va_mtime = vap->va_atime;
   1775 		vap->va_ctime = vap->va_atime;
   1776 
   1777 		rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
   1778 		    (void *)physpath, cred, SDEV_READY);
   1779 		kmem_free(physpath, MAXPATHLEN);
   1780 		if (rv)
   1781 			return (rv);
   1782 	} else if (flags & SDEV_VATTR) {
   1783 		/*
   1784 		 * /dev/pts
   1785 		 *
   1786 		 * callback is responsible to set the basic attributes,
   1787 		 * e.g. va_type/va_uid/va_gid/
   1788 		 *    dev_t if VCHR or VBLK/
   1789 		 */
   1790 		ASSERT(callback);
   1791 		rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
   1792 		if (rv) {
   1793 			sdcmn_err3(("devname_lookup_func: SDEV_NONE "
   1794 			    "callback failed \n"));
   1795 			return (-1);
   1796 		}
   1797 
   1798 		rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
   1799 		    cred, SDEV_READY);
   1800 
   1801 		if (rv)
   1802 			return (rv);
   1803 
   1804 	} else {
   1805 		impossible(("lookup: %s/%s by %s not supported (%d)\n",
   1806 		    SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
   1807 		    __LINE__));
   1808 		rv = -1;
   1809 	}
   1810 
   1811 	*dvp = dv;
   1812 	return (rv);
   1813 }
   1814 
   1815 static int
   1816 is_devfsadm_thread(char *exec_name)
   1817 {
   1818 	/*
   1819 	 * note: because devfsadmd -> /usr/sbin/devfsadm
   1820 	 * it is safe to use "devfsadm" to capture the lookups
   1821 	 * from devfsadm and its daemon version.
   1822 	 */
   1823 	if (strcmp(exec_name, "devfsadm") == 0)
   1824 		return (1);
   1825 	return (0);
   1826 }
   1827 
   1828 /*
   1829  * Lookup Order:
   1830  *	sdev_node cache;
   1831  *	backing store (SDEV_PERSIST);
   1832  *	DBNR: a. dir_ops implemented in the loadable modules;
   1833  *	      b. vnode ops in vtab.
   1834  */
   1835 int
   1836 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
   1837     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
   1838     struct cred *, void *, char *), int flags)
   1839 {
   1840 	int rv = 0, nmlen;
   1841 	struct vnode *rvp = NULL;
   1842 	struct sdev_node *dv = NULL;
   1843 	int	retried = 0;
   1844 	int	error = 0;
   1845 	struct vattr vattr;
   1846 	char *lookup_thread = curproc->p_user.u_comm;
   1847 	int failed_flags = 0;
   1848 	int (*vtor)(struct sdev_node *) = NULL;
   1849 	int state;
   1850 	int parent_state;
   1851 	char *link = NULL;
   1852 
   1853 	if (SDEVTOV(ddv)->v_type != VDIR)
   1854 		return (ENOTDIR);
   1855 
   1856 	/*
   1857 	 * Empty name or ., return node itself.
   1858 	 */
   1859 	nmlen = strlen(nm);
   1860 	if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
   1861 		*vpp = SDEVTOV(ddv);
   1862 		VN_HOLD(*vpp);
   1863 		return (0);
   1864 	}
   1865 
   1866 	/*
   1867 	 * .., return the parent directory
   1868 	 */
   1869 	if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
   1870 		*vpp = SDEVTOV(ddv->sdev_dotdot);
   1871 		VN_HOLD(*vpp);
   1872 		return (0);
   1873 	}
   1874 
   1875 	rw_enter(&ddv->sdev_contents, RW_READER);
   1876 	if (ddv->sdev_flags & SDEV_VTOR) {
   1877 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
   1878 		ASSERT(vtor);
   1879 	}
   1880 
   1881 tryagain:
   1882 	/*
   1883 	 * (a) directory cache lookup:
   1884 	 */
   1885 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
   1886 	parent_state = ddv->sdev_state;
   1887 	dv = sdev_cache_lookup(ddv, nm);
   1888 	if (dv) {
   1889 		state = dv->sdev_state;
   1890 		switch (state) {
   1891 		case SDEV_INIT:
   1892 			if (is_devfsadm_thread(lookup_thread))
   1893 				break;
   1894 
   1895 			/* ZOMBIED parent won't allow node creation */
   1896 			if (parent_state == SDEV_ZOMBIE) {
   1897 				SD_TRACE_FAILED_LOOKUP(ddv, nm,
   1898 				    retried);
   1899 				goto nolock_notfound;
   1900 			}
   1901 
   1902 			mutex_enter(&dv->sdev_lookup_lock);
   1903 			/* compensate the threads started after devfsadm */
   1904 			if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
   1905 			    !(SDEV_IS_LOOKUP(dv)))
   1906 				SDEV_BLOCK_OTHERS(dv,
   1907 				    (SDEV_LOOKUP | SDEV_LGWAITING));
   1908 
   1909 			if (SDEV_IS_LOOKUP(dv)) {
   1910 				failed_flags |= SLF_REBUILT;
   1911 				rw_exit(&ddv->sdev_contents);
   1912 				error = sdev_wait4lookup(dv, SDEV_LOOKUP);
   1913 				mutex_exit(&dv->sdev_lookup_lock);
   1914 				rw_enter(&ddv->sdev_contents, RW_READER);
   1915 
   1916 				if (error != 0) {
   1917 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
   1918 					    retried);
   1919 					goto nolock_notfound;
   1920 				}
   1921 
   1922 				state = dv->sdev_state;
   1923 				if (state == SDEV_INIT) {
   1924 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
   1925 					    retried);
   1926 					goto nolock_notfound;
   1927 				} else if (state == SDEV_READY) {
   1928 					goto found;
   1929 				} else if (state == SDEV_ZOMBIE) {
   1930 					rw_exit(&ddv->sdev_contents);
   1931 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
   1932 					    retried);
   1933 					SDEV_RELE(dv);
   1934 					goto lookup_failed;
   1935 				}
   1936 			} else {
   1937 				mutex_exit(&dv->sdev_lookup_lock);
   1938 			}
   1939 			break;
   1940 		case SDEV_READY:
   1941 			goto found;
   1942 		case SDEV_ZOMBIE:
   1943 			rw_exit(&ddv->sdev_contents);
   1944 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   1945 			SDEV_RELE(dv);
   1946 			goto lookup_failed;
   1947 		default:
   1948 			rw_exit(&ddv->sdev_contents);
   1949 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   1950 			sdev_lookup_failed(ddv, nm, failed_flags);
   1951 			*vpp = NULLVP;
   1952 			return (ENOENT);
   1953 		}
   1954 	}
   1955 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
   1956 
   1957 	/*
   1958 	 * ZOMBIED parent does not allow new node creation.
   1959 	 * bail out early
   1960 	 */
   1961 	if (parent_state == SDEV_ZOMBIE) {
   1962 		rw_exit(&ddv->sdev_contents);
   1963 		*vpp = NULLVP;
   1964 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   1965 		return (ENOENT);
   1966 	}
   1967 
   1968 	/*
   1969 	 * (b0): backing store lookup
   1970 	 *	SDEV_PERSIST is default except:
   1971 	 *		1) pts nodes
   1972 	 *		2) non-chmod'ed local nodes
   1973 	 *		3) zvol nodes
   1974 	 */
   1975 	if (SDEV_IS_PERSIST(ddv)) {
   1976 		error = devname_backstore_lookup(ddv, nm, &rvp);
   1977 
   1978 		if (!error) {
   1979 
   1980 			vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
   1981 			error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
   1982 			if (error) {
   1983 				rw_exit(&ddv->sdev_contents);
   1984 				if (dv)
   1985 					SDEV_RELE(dv);
   1986 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   1987 				sdev_lookup_failed(ddv, nm, failed_flags);
   1988 				*vpp = NULLVP;
   1989 				return (ENOENT);
   1990 			}
   1991 
   1992 			if (vattr.va_type == VLNK) {
   1993 				error = sdev_getlink(rvp, &link);
   1994 				if (error) {
   1995 					rw_exit(&ddv->sdev_contents);
   1996 					if (dv)
   1997 						SDEV_RELE(dv);
   1998 					SD_TRACE_FAILED_LOOKUP(ddv, nm,
   1999 					    retried);
   2000 					sdev_lookup_failed(ddv, nm,
   2001 					    failed_flags);
   2002 					*vpp = NULLVP;
   2003 					return (ENOENT);
   2004 				}
   2005 				ASSERT(link != NULL);
   2006 			}
   2007 
   2008 			if (!rw_tryupgrade(&ddv->sdev_contents)) {
   2009 				rw_exit(&ddv->sdev_contents);
   2010 				rw_enter(&ddv->sdev_contents, RW_WRITER);
   2011 			}
   2012 			error = sdev_mknode(ddv, nm, &dv, &vattr,
   2013 			    rvp, link, cred, SDEV_READY);
   2014 			rw_downgrade(&ddv->sdev_contents);
   2015 
   2016 			if (link != NULL) {
   2017 				kmem_free(link, strlen(link) + 1);
   2018 				link = NULL;
   2019 			}
   2020 
   2021 			if (error) {
   2022 				SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2023 				rw_exit(&ddv->sdev_contents);
   2024 				if (dv)
   2025 					SDEV_RELE(dv);
   2026 				goto lookup_failed;
   2027 			} else {
   2028 				goto found;
   2029 			}
   2030 		} else if (retried) {
   2031 			rw_exit(&ddv->sdev_contents);
   2032 			sdcmn_err3(("retry of lookup of %s/%s: failed\n",
   2033 			    ddv->sdev_name, nm));
   2034 			if (dv)
   2035 				SDEV_RELE(dv);
   2036 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2037 			sdev_lookup_failed(ddv, nm, failed_flags);
   2038 			*vpp = NULLVP;
   2039 			return (ENOENT);
   2040 		}
   2041 	}
   2042 
   2043 lookup_create_node:
   2044 	/* first thread that is doing the lookup on this node */
   2045 	if (callback) {
   2046 		ASSERT(dv == NULL);
   2047 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
   2048 			rw_exit(&ddv->sdev_contents);
   2049 			rw_enter(&ddv->sdev_contents, RW_WRITER);
   2050 		}
   2051 		error = sdev_call_dircallback(ddv, &dv, nm, callback,
   2052 		    flags, cred);
   2053 		rw_downgrade(&ddv->sdev_contents);
   2054 		if (error == 0) {
   2055 			goto found;
   2056 		} else {
   2057 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2058 			rw_exit(&ddv->sdev_contents);
   2059 			goto lookup_failed;
   2060 		}
   2061 	}
   2062 	if (!dv) {
   2063 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
   2064 			rw_exit(&ddv->sdev_contents);
   2065 			rw_enter(&ddv->sdev_contents, RW_WRITER);
   2066 		}
   2067 		error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
   2068 		    cred, SDEV_INIT);
   2069 		if (!dv) {
   2070 			rw_exit(&ddv->sdev_contents);
   2071 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2072 			sdev_lookup_failed(ddv, nm, failed_flags);
   2073 			*vpp = NULLVP;
   2074 			return (ENOENT);
   2075 		}
   2076 		rw_downgrade(&ddv->sdev_contents);
   2077 	}
   2078 
   2079 	/*
   2080 	 * (b1) invoking devfsadm once per life time for devfsadm nodes
   2081 	 */
   2082 	ASSERT(SDEV_HELD(dv));
   2083 
   2084 	if (SDEV_IS_NO_NCACHE(dv))
   2085 		failed_flags |= SLF_NO_NCACHE;
   2086 	if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
   2087 	    SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
   2088 	    ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
   2089 		ASSERT(SDEV_HELD(dv));
   2090 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2091 		goto nolock_notfound;
   2092 	}
   2093 
   2094 	/*
   2095 	 * filter out known non-existent devices recorded
   2096 	 * during initial reconfiguration boot for which
   2097 	 * reconfig should not be done and lookup may
   2098 	 * be short-circuited now.
   2099 	 */
   2100 	if (sdev_lookup_filter(ddv, nm)) {
   2101 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2102 		goto nolock_notfound;
   2103 	}
   2104 
   2105 	/* bypassing devfsadm internal nodes */
   2106 	if (is_devfsadm_thread(lookup_thread)) {
   2107 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2108 		goto nolock_notfound;
   2109 	}
   2110 
   2111 	if (sdev_reconfig_disable) {
   2112 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2113 		goto nolock_notfound;
   2114 	}
   2115 
   2116 	error = sdev_call_devfsadmd(ddv, dv, nm);
   2117 	if (error == 0) {
   2118 		sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
   2119 		    ddv->sdev_name, nm, curproc->p_user.u_comm));
   2120 		if (sdev_reconfig_verbose) {
   2121 			cmn_err(CE_CONT,
   2122 			    "?lookup of %s/%s by %s: reconfig\n",
   2123 			    ddv->sdev_name, nm, curproc->p_user.u_comm);
   2124 		}
   2125 		retried = 1;
   2126 		failed_flags |= SLF_REBUILT;
   2127 		ASSERT(dv->sdev_state != SDEV_ZOMBIE);
   2128 		SDEV_SIMPLE_RELE(dv);
   2129 		goto tryagain;
   2130 	} else {
   2131 		SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2132 		goto nolock_notfound;
   2133 	}
   2134 
   2135 found:
   2136 	ASSERT(!(dv->sdev_flags & SDEV_STALE));
   2137 	ASSERT(dv->sdev_state == SDEV_READY);
   2138 	if (vtor) {
   2139 		/*
   2140 		 * Check validity of returned node
   2141 		 */
   2142 		switch (vtor(dv)) {
   2143 		case SDEV_VTOR_VALID:
   2144 			break;
   2145 		case SDEV_VTOR_STALE:
   2146 			/*
   2147 			 * The name exists, but the cache entry is
   2148 			 * stale and needs to be re-created.
   2149 			 */
   2150 			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
   2151 			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
   2152 				rw_exit(&ddv->sdev_contents);
   2153 				rw_enter(&ddv->sdev_contents, RW_WRITER);
   2154 			}
   2155 			error = sdev_cache_update(ddv, &dv, nm,
   2156 			    SDEV_CACHE_DELETE);
   2157 			rw_downgrade(&ddv->sdev_contents);
   2158 			if (error == 0) {
   2159 				dv = NULL;
   2160 				goto lookup_create_node;
   2161 			}
   2162 			/* FALLTHRU */
   2163 		case SDEV_VTOR_INVALID:
   2164 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2165 			sdcmn_err7(("lookup: destroy invalid "
   2166 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
   2167 			goto nolock_notfound;
   2168 		case SDEV_VTOR_SKIP:
   2169 			sdcmn_err7(("lookup: node not applicable - "
   2170 			    "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
   2171 			rw_exit(&ddv->sdev_contents);
   2172 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
   2173 			SDEV_RELE(dv);
   2174 			goto lookup_failed;
   2175 		default:
   2176 			cmn_err(CE_PANIC,
   2177 			    "dev fs: validator failed: %s(%p)\n",
   2178 			    dv->sdev_name, (void *)dv);
   2179 			break;
   2180 		}
   2181 	}
   2182 
   2183 	rw_exit(&ddv->sdev_contents);
   2184 	rv = sdev_to_vp(dv, vpp);
   2185 	sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
   2186 	    "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
   2187 	    dv->sdev_state, nm, rv));
   2188 	return (rv);
   2189 
   2190 nolock_notfound:
   2191 	/*
   2192 	 * Destroy the node that is created for synchronization purposes.
   2193 	 */
   2194 	sdcmn_err3(("devname_lookup_func: %s with state %d\n",
   2195 	    nm, dv->sdev_state));
   2196 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
   2197 	if (dv->sdev_state == SDEV_INIT) {
   2198 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
   2199 			rw_exit(&ddv->sdev_contents);
   2200 			rw_enter(&ddv->sdev_contents, RW_WRITER);
   2201 		}
   2202 
   2203 		/*
   2204 		 * Node state may have changed during the lock
   2205 		 * changes. Re-check.
   2206 		 */
   2207 		if (dv->sdev_state == SDEV_INIT) {
   2208 			(void) sdev_dirdelete(ddv, dv);
   2209 			rw_exit(&ddv->sdev_contents);
   2210 			sdev_lookup_failed(ddv, nm, failed_flags);
   2211 			*vpp = NULL;
   2212 			return (ENOENT);
   2213 		}
   2214 	}
   2215 
   2216 	rw_exit(&ddv->sdev_contents);
   2217 	SDEV_RELE(dv);
   2218 
   2219 lookup_failed:
   2220 	sdev_lookup_failed(ddv, nm, failed_flags);
   2221 	*vpp = NULL;
   2222 	return (ENOENT);
   2223 }
   2224 
   2225 /*
   2226  * Given a directory node, mark all nodes beneath as
   2227  * STALE, i.e. nodes that don't exist as far as new
   2228  * consumers are concerned.  Remove them from the
   2229  * list of directory entries so that no lookup or
   2230  * directory traversal will find them.  The node
   2231  * not deallocated so existing holds are not affected.
   2232  */
   2233 void
   2234 sdev_stale(struct sdev_node *ddv)
   2235 {
   2236 	struct sdev_node *dv;
   2237 	struct vnode *vp;
   2238 
   2239 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
   2240 
   2241 	rw_enter(&ddv->sdev_contents, RW_WRITER);
   2242 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
   2243 		vp = SDEVTOV(dv);
   2244 		if (vp->v_type == VDIR)
   2245 			sdev_stale(dv);
   2246 
   2247 		sdcmn_err9(("sdev_stale: setting stale %s\n",
   2248 		    dv->sdev_path));
   2249 		dv->sdev_flags |= SDEV_STALE;
   2250 		avl_remove(&ddv->sdev_entries, dv);
   2251 	}
   2252 	ddv->sdev_flags |= SDEV_BUILD;
   2253 	rw_exit(&ddv->sdev_contents);
   2254 }
   2255 
   2256 /*
   2257  * Given a directory node, clean out all the nodes beneath.
   2258  * If expr is specified, clean node with names matching expr.
   2259  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
   2260  *	so they are excluded from future lookups.
   2261  */
   2262 int
   2263 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
   2264 {
   2265 	int error = 0;
   2266 	int busy = 0;
   2267 	struct vnode *vp;
   2268 	struct sdev_node *dv, *next = NULL;
   2269 	int bkstore = 0;
   2270 	int len = 0;
   2271 	char *bks_name = NULL;
   2272 
   2273 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
   2274 
   2275 	/*
   2276 	 * We try our best to destroy all unused sdev_node's
   2277 	 */
   2278 	rw_enter(&ddv->sdev_contents, RW_WRITER);
   2279 	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
   2280 		next = SDEV_NEXT_ENTRY(ddv, dv);
   2281 		vp = SDEVTOV(dv);
   2282 
   2283 		if (expr && gmatch(dv->sdev_name, expr) == 0)
   2284 			continue;
   2285 
   2286 		if (vp->v_type == VDIR &&
   2287 		    sdev_cleandir(dv, NULL, flags) != 0) {
   2288 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
   2289 			    dv->sdev_name));
   2290 			busy++;
   2291 			continue;
   2292 		}
   2293 
   2294 		if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
   2295 			sdcmn_err9(("sdev_cleandir: dir %s busy\n",
   2296 			    dv->sdev_name));
   2297 			busy++;
   2298 			continue;
   2299 		}
   2300 
   2301 		/*
   2302 		 * at this point, either dv is not held or SDEV_ENFORCE
   2303 		 * is specified. In either case, dv needs to be deleted
   2304 		 */
   2305 		SDEV_HOLD(dv);
   2306 
   2307 		bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
   2308 		if (bkstore && (vp->v_type == VDIR))
   2309 			bkstore += 1;
   2310 
   2311 		if (bkstore) {
   2312 			len = strlen(dv->sdev_name) + 1;
   2313 			bks_name = kmem_alloc(len, KM_SLEEP);
   2314 			bcopy(dv->sdev_name, bks_name, len);
   2315 		}
   2316 
   2317 		error = sdev_dirdelete(ddv, dv);
   2318 
   2319 		if (error == EBUSY) {
   2320 			sdcmn_err9(("sdev_cleandir: dir busy\n"));
   2321 			busy++;
   2322 		}
   2323 
   2324 		/* take care the backing store clean up */
   2325 		if (bkstore && (error == 0)) {
   2326 			ASSERT(bks_name);
   2327 			ASSERT(ddv->sdev_attrvp);
   2328 
   2329 			if (bkstore == 1) {
   2330 				error = VOP_REMOVE(ddv->sdev_attrvp,
   2331 				    bks_name, kcred, NULL, 0);
   2332 			} else if (bkstore == 2) {
   2333 				error = VOP_RMDIR(ddv->sdev_attrvp,
   2334 				    bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
   2335 			}
   2336 
   2337 			/* do not propagate the backing store errors */
   2338 			if (error) {
   2339 				sdcmn_err9(("sdev_cleandir: backing store"
   2340 				    "not cleaned\n"));
   2341 				error = 0;
   2342 			}
   2343 
   2344 			bkstore = 0;
   2345 			kmem_free(bks_name, len);
   2346 			bks_name = NULL;
   2347 			len = 0;
   2348 		}
   2349 	}
   2350 
   2351 	ddv->sdev_flags |= SDEV_BUILD;
   2352 	rw_exit(&ddv->sdev_contents);
   2353 
   2354 	if (busy) {
   2355 		error = EBUSY;
   2356 	}
   2357 
   2358 	return (error);
   2359 }
   2360 
   2361 /*
   2362  * a convenient wrapper for readdir() funcs
   2363  */
   2364 size_t
   2365 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
   2366 {
   2367 	size_t reclen = DIRENT64_RECLEN(strlen(nm));
   2368 	if (reclen > size)
   2369 		return (0);
   2370 
   2371 	de->d_ino = (ino64_t)ino;
   2372 	de->d_off = (off64_t)off + 1;
   2373 	de->d_reclen = (ushort_t)reclen;
   2374 	(void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
   2375 	return (reclen);
   2376 }
   2377 
   2378 /*
   2379  * sdev_mount service routines
   2380  */
   2381 int
   2382 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
   2383 {
   2384 	int	error;
   2385 
   2386 	if (uap->datalen != sizeof (*args))
   2387 		return (EINVAL);
   2388 
   2389 	if (error = copyin(uap->dataptr, args, sizeof (*args))) {
   2390 		cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
   2391 		    "get user data. error %d\n", error);
   2392 		return (EFAULT);
   2393 	}
   2394 
   2395 	return (0);
   2396 }
   2397 
   2398 #ifdef nextdp
   2399 #undef nextdp
   2400 #endif
   2401 #define	nextdp(dp)	((struct dirent64 *) \
   2402 			    (intptr_t)((char *)(dp) + (dp)->d_reclen))
   2403 
   2404 /*
   2405  * readdir helper func
   2406  */
   2407 int
   2408 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
   2409     int flags)
   2410 {
   2411 	struct sdev_node *ddv = VTOSDEV(vp);
   2412 	struct sdev_node *dv;
   2413 	dirent64_t	*dp;
   2414 	ulong_t		outcount = 0;
   2415 	size_t		namelen;
   2416 	ulong_t		alloc_count;
   2417 	void		*outbuf;
   2418 	struct iovec	*iovp;
   2419 	int		error = 0;
   2420 	size_t		reclen;
   2421 	offset_t	diroff;
   2422 	offset_t	soff;
   2423 	int		this_reclen;
   2424 	int (*vtor)(struct sdev_node *) = NULL;
   2425 	struct vattr attr;
   2426 	timestruc_t now;
   2427 
   2428 	ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
   2429 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
   2430 
   2431 	if (uiop->uio_loffset >= MAXOFF_T) {
   2432 		if (eofp)
   2433 			*eofp = 1;
   2434 		return (0);
   2435 	}
   2436 
   2437 	if (uiop->uio_iovcnt != 1)
   2438 		return (EINVAL);
   2439 
   2440 	if (vp->v_type != VDIR)
   2441 		return (ENOTDIR);
   2442 
   2443 	if (ddv->sdev_flags & SDEV_VTOR) {
   2444 		vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
   2445 		ASSERT(vtor);
   2446 	}
   2447 
   2448 	if (eofp != NULL)
   2449 		*eofp = 0;
   2450 
   2451 	soff = uiop->uio_loffset;
   2452 	iovp = uiop->uio_iov;
   2453 	alloc_count = iovp->iov_len;
   2454 	dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
   2455 	outcount = 0;
   2456 
   2457 	if (ddv->sdev_state == SDEV_ZOMBIE)
   2458 		goto get_cache;
   2459 
   2460 	if (SDEV_IS_GLOBAL(ddv)) {
   2461 
   2462 		if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
   2463 		    !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
   2464 		    !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
   2465 		    ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
   2466 		    !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
   2467 		    !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
   2468 		    !sdev_reconfig_disable) {
   2469 			/*
   2470 			 * invoking "devfsadm" to do system device reconfig
   2471 			 */
   2472 			mutex_enter(&ddv->sdev_lookup_lock);
   2473 			SDEV_BLOCK_OTHERS(ddv,
   2474 			    (SDEV_READDIR|SDEV_LGWAITING));
   2475 			mutex_exit(&ddv->sdev_lookup_lock);
   2476 
   2477 			sdcmn_err8(("readdir of %s by %s: reconfig\n",
   2478 			    ddv->sdev_path, curproc->p_user.u_comm));
   2479 			if (sdev_reconfig_verbose) {
   2480 				cmn_err(CE_CONT,
   2481 				    "?readdir of %s by %s: reconfig\n",
   2482 				    ddv->sdev_path, curproc->p_user.u_comm);
   2483 			}
   2484 
   2485 			sdev_devfsadmd_thread(ddv, NULL, kcred);
   2486 		} else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
   2487 			/*
   2488 			 * compensate the "ls" started later than "devfsadm"
   2489 			 */
   2490 			mutex_enter(&ddv->sdev_lookup_lock);
   2491 			SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
   2492 			mutex_exit(&ddv->sdev_lookup_lock);
   2493 		}
   2494 
   2495 		/*
   2496 		 * release the contents lock so that
   2497 		 * the cache may be updated by devfsadmd
   2498 		 */
   2499 		rw_exit(&ddv->sdev_contents);
   2500 		mutex_enter(&ddv->sdev_lookup_lock);
   2501 		if (SDEV_IS_READDIR(ddv))
   2502 			(void) sdev_wait4lookup(ddv, SDEV_READDIR);
   2503 		mutex_exit(&ddv->sdev_lookup_lock);
   2504 		rw_enter(&ddv->sdev_contents, RW_READER);
   2505 
   2506 		sdcmn_err4(("readdir of directory %s by %s\n",
   2507 		    ddv->sdev_name, curproc->p_user.u_comm));
   2508 		if (ddv->sdev_flags & SDEV_BUILD) {
   2509 			if (SDEV_IS_PERSIST(ddv)) {
   2510 				error = sdev_filldir_from_store(ddv,
   2511 				    alloc_count, cred);
   2512 			}
   2513 			ddv->sdev_flags &= ~SDEV_BUILD;
   2514 		}
   2515 	}
   2516 
   2517 get_cache:
   2518 	/* handle "." and ".." */
   2519 	diroff = 0;
   2520 	if (soff == 0) {
   2521 		/* first time */
   2522 		this_reclen = DIRENT64_RECLEN(1);
   2523 		if (alloc_count < this_reclen) {
   2524 			error = EINVAL;
   2525 			goto done;
   2526 		}
   2527 
   2528 		dp->d_ino = (ino64_t)ddv->sdev_ino;
   2529 		dp->d_off = (off64_t)1;
   2530 		dp->d_reclen = (ushort_t)this_reclen;
   2531 
   2532 		(void) strncpy(dp->d_name, ".",
   2533 		    DIRENT64_NAMELEN(this_reclen));
   2534 		outcount += dp->d_reclen;
   2535 		dp = nextdp(dp);
   2536 	}
   2537 
   2538 	diroff++;
   2539 	if (soff <= 1) {
   2540 		this_reclen = DIRENT64_RECLEN(2);
   2541 		if (alloc_count < outcount + this_reclen) {
   2542 			error = EINVAL;
   2543 			goto done;
   2544 		}
   2545 
   2546 		dp->d_reclen = (ushort_t)this_reclen;
   2547 		dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
   2548 		dp->d_off = (off64_t)2;
   2549 
   2550 		(void) strncpy(dp->d_name, "..",
   2551 		    DIRENT64_NAMELEN(this_reclen));
   2552 		outcount += dp->d_reclen;
   2553 
   2554 		dp = nextdp(dp);
   2555 	}
   2556 
   2557 
   2558 	/* gets the cache */
   2559 	diroff++;
   2560 	for (dv = SDEV_FIRST_ENTRY(ddv); dv;
   2561 	    dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
   2562 		sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
   2563 		    diroff, soff, dv->sdev_name));
   2564 
   2565 		/* bypassing pre-matured nodes */
   2566 		if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
   2567 			sdcmn_err3(("sdev_readdir: pre-mature node  "
   2568 			    "%s %d\n", dv->sdev_name, dv->sdev_state));
   2569 			continue;
   2570 		}
   2571 
   2572 		/*
   2573 		 * Check validity of node
   2574 		 */
   2575 		if (vtor) {
   2576 			switch (vtor(dv)) {
   2577 			case SDEV_VTOR_VALID:
   2578 				break;
   2579 			case SDEV_VTOR_INVALID:
   2580 			case SDEV_VTOR_SKIP:
   2581 				continue;
   2582 			default:
   2583 				cmn_err(CE_PANIC,
   2584 				    "dev fs: validator failed: %s(%p)\n",
   2585 				    dv->sdev_name, (void *)dv);
   2586 				break;
   2587 			/*NOTREACHED*/
   2588 			}
   2589 		}
   2590 
   2591 		namelen = strlen(dv->sdev_name);
   2592 		reclen = DIRENT64_RECLEN(namelen);
   2593 		if (outcount + reclen > alloc_count) {
   2594 			goto full;
   2595 		}
   2596 		dp->d_reclen = (ushort_t)reclen;
   2597 		dp->d_ino = (ino64_t)dv->sdev_ino;
   2598 		dp->d_off = (off64_t)diroff + 1;
   2599 		(void) strncpy(dp->d_name, dv->sdev_name,
   2600 		    DIRENT64_NAMELEN(reclen));
   2601 		outcount += reclen;
   2602 		dp = nextdp(dp);
   2603 	}
   2604 
   2605 full:
   2606 	sdcmn_err4(("sdev_readdir: moving %lu bytes: "
   2607 	    "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
   2608 	    (void *)dv));
   2609 
   2610 	if (outcount)
   2611 		error = uiomove(outbuf, outcount, UIO_READ, uiop);
   2612 
   2613 	if (!error) {
   2614 		uiop->uio_loffset = diroff;
   2615 		if (eofp)
   2616 			*eofp = dv ? 0 : 1;
   2617 	}
   2618 
   2619 
   2620 	if (ddv->sdev_attrvp) {
   2621 		gethrestime(&now);
   2622 		attr.va_ctime = now;
   2623 		attr.va_atime = now;
   2624 		attr.va_mask = AT_CTIME|AT_ATIME;
   2625 
   2626 		(void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
   2627 	}
   2628 done:
   2629 	kmem_free(outbuf, alloc_count);
   2630 	return (error);
   2631 }
   2632 
   2633 static int
   2634 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
   2635 {
   2636 	vnode_t *vp;
   2637 	vnode_t *cvp;
   2638 	struct sdev_node *svp;
   2639 	char *nm;
   2640 	struct pathname pn;
   2641 	int error;
   2642 	int persisted = 0;
   2643 
   2644 	ASSERT(INGLOBALZONE(curproc));
   2645 
   2646 	if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
   2647 		return (error);
   2648 	nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
   2649 
   2650 	vp = rootdir;
   2651 	VN_HOLD(vp);
   2652 
   2653 	while (pn_pathleft(&pn)) {
   2654 		ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
   2655 		(void) pn_getcomponent(&pn, nm);
   2656 
   2657 		/*
   2658 		 * Deal with the .. special case where we may be
   2659 		 * traversing up across a mount point, to the
   2660 		 * root of this filesystem or global root.
   2661 		 */
   2662 		if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
   2663 checkforroot:
   2664 			if (VN_CMP(vp, rootdir)) {
   2665 				nm[1] = 0;
   2666 			} else if (vp->v_flag & VROOT) {
   2667 				vfs_t *vfsp;
   2668 				cvp = vp;
   2669 				vfsp = cvp->v_vfsp;
   2670 				vfs_rlock_wait(vfsp);
   2671 				vp = cvp->v_vfsp->vfs_vnodecovered;
   2672 				if (vp == NULL ||
   2673 				    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
   2674 					vfs_unlock(vfsp);
   2675 					VN_RELE(cvp);
   2676 					error = EIO;
   2677 					break;
   2678 				}
   2679 				VN_HOLD(vp);
   2680 				vfs_unlock(vfsp);
   2681 				VN_RELE(cvp);
   2682 				cvp = NULL;
   2683 				goto checkforroot;
   2684 			}
   2685 		}
   2686 
   2687 		error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
   2688 		    NULL, NULL);
   2689 		if (error) {
   2690 			VN_RELE(vp);
   2691 			break;
   2692 		}
   2693 
   2694 		/* traverse mount points encountered on our journey */
   2695 		if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
   2696 			VN_RELE(vp);
   2697 			VN_RELE(cvp);
   2698 			break;
   2699 		}
   2700 
   2701 		/*
   2702 		 * symbolic link, can be either relative and absolute
   2703 		 */
   2704 		if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
   2705 			struct pathname linkpath;
   2706 			pn_alloc(&linkpath);
   2707 			if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
   2708 				pn_free(&linkpath);
   2709 				break;
   2710 			}
   2711 			if (pn_pathleft(&linkpath) == 0)
   2712 				(void) pn_set(&linkpath, ".");
   2713 			error = pn_insert(&pn, &linkpath, strlen(nm));
   2714 			pn_free(&linkpath);
   2715 			if (pn.pn_pathlen == 0) {
   2716 				VN_RELE(vp);
   2717 				return (ENOENT);
   2718 			}
   2719 			if (pn.pn_path[0] == '/') {
   2720 				pn_skipslash(&pn);
   2721 				VN_RELE(vp);
   2722 				VN_RELE(cvp);
   2723 				vp = rootdir;
   2724 				VN_HOLD(vp);
   2725 			} else {
   2726 				VN_RELE(cvp);
   2727 			}
   2728 			continue;
   2729 		}
   2730 
   2731 		VN_RELE(vp);
   2732 
   2733 		/*
   2734 		 * Direct the operation to the persisting filesystem
   2735 		 * underlying /dev.  Bail if we encounter a
   2736 		 * non-persistent dev entity here.
   2737 		 */
   2738 		if (cvp->v_vfsp->vfs_fstype == devtype) {
   2739 
   2740 			if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
   2741 				error = ENOENT;
   2742 				VN_RELE(cvp);
   2743 				break;
   2744 			}
   2745 
   2746 			if (VTOSDEV(cvp) == NULL) {
   2747 				error = ENOENT;
   2748 				VN_RELE(cvp);
   2749 				break;
   2750 			}
   2751 			svp = VTOSDEV(cvp);
   2752 			if ((vp = svp->sdev_attrvp) == NULL) {
   2753 				error = ENOENT;
   2754 				VN_RELE(cvp);
   2755 				break;
   2756 			}
   2757 			persisted = 1;
   2758 			VN_HOLD(vp);
   2759 			VN_RELE(cvp);
   2760 			cvp = vp;
   2761 		}
   2762 
   2763 		vp = cvp;
   2764 		pn_skipslash(&pn);
   2765 	}
   2766 
   2767 	kmem_free(nm, MAXNAMELEN);
   2768 	pn_free(&pn);
   2769 
   2770 	if (error)
   2771 		return (error);
   2772 
   2773 	/*
   2774 	 * Only return persisted nodes in the filesystem underlying /dev.
   2775 	 */
   2776 	if (!persisted) {
   2777 		VN_RELE(vp);
   2778 		return (ENOENT);
   2779 	}
   2780 
   2781 	*r_vp = vp;
   2782 	return (0);
   2783 }
   2784 
   2785 int
   2786 sdev_modctl_readdir(const char *dir, char ***dirlistp,
   2787 	int *npathsp, int *npathsp_alloc, int checking_empty)
   2788 {
   2789 	char	**pathlist = NULL;
   2790 	char	**newlist = NULL;
   2791 	int	npaths = 0;
   2792 	int	npaths_alloc = 0;
   2793 	dirent64_t *dbuf = NULL;
   2794 	int	n;
   2795 	char	*s;
   2796 	int error;
   2797 	vnode_t *vp;
   2798 	int eof;
   2799 	struct iovec iov;
   2800 	struct uio uio;
   2801 	struct dirent64 *dp;
   2802 	size_t dlen;
   2803 	size_t dbuflen;
   2804 	int ndirents = 64;
   2805 	char *nm;
   2806 
   2807 	error = sdev_modctl_lookup(dir, &vp);
   2808 	sdcmn_err11(("modctl readdir: %s by %s: %s\n",
   2809 	    dir, curproc->p_user.u_comm,
   2810 	    (error == 0) ? "ok" : "failed"));
   2811 	if (error)
   2812 		return (error);
   2813 
   2814 	dlen = ndirents * (sizeof (*dbuf));
   2815 	dbuf = kmem_alloc(dlen, KM_SLEEP);
   2816 
   2817 	uio.uio_iov = &iov;
   2818 	uio.uio_iovcnt = 1;
   2819 	uio.uio_segflg = UIO_SYSSPACE;
   2820 	uio.uio_fmode = 0;
   2821 	uio.uio_extflg = UIO_COPY_CACHED;
   2822 	uio.uio_loffset = 0;
   2823 	uio.uio_llimit = MAXOFFSET_T;
   2824 
   2825 	eof = 0;
   2826 	error = 0;
   2827 	while (!error && !eof) {
   2828 		uio.uio_resid = dlen;
   2829 		iov.iov_base = (char *)dbuf;
   2830 		iov.iov_len = dlen;
   2831 
   2832 		(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
   2833 		error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
   2834 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
   2835 
   2836 		dbuflen = dlen - uio.uio_resid;
   2837 
   2838 		if (error || dbuflen == 0)
   2839 			break;
   2840 
   2841 		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
   2842 		    dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
   2843 
   2844 			nm = dp->d_name;
   2845 
   2846 			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
   2847 				continue;
   2848 			if (npaths == npaths_alloc) {
   2849 				npaths_alloc += 64;
   2850 				newlist = (char **)
   2851 				    kmem_zalloc((npaths_alloc + 1) *
   2852 				    sizeof (char *), KM_SLEEP);
   2853 				if (pathlist) {
   2854 					bcopy(pathlist, newlist,
   2855 					    npaths * sizeof (char *));
   2856 					kmem_free(pathlist,
   2857 					    (npaths + 1) * sizeof (char *));
   2858 				}
   2859 				pathlist = newlist;
   2860 			}
   2861 			n = strlen(nm) + 1;
   2862 			s = kmem_alloc(n, KM_SLEEP);
   2863 			bcopy(nm, s, n);
   2864 			pathlist[npaths++] = s;
   2865 			sdcmn_err11(("  %s/%s\n", dir, s));
   2866 
   2867 			/* if checking empty, one entry is as good as many */
   2868 			if (checking_empty) {
   2869 				eof = 1;
   2870 				break;
   2871 			}
   2872 		}
   2873 	}
   2874 
   2875 exit:
   2876 	VN_RELE(vp);
   2877 
   2878 	if (dbuf)
   2879 		kmem_free(dbuf, dlen);
   2880 
   2881 	if (error)
   2882 		return (error);
   2883 
   2884 	*dirlistp = pathlist;
   2885 	*npathsp = npaths;
   2886 	*npathsp_alloc = npaths_alloc;
   2887 
   2888 	return (0);
   2889 }
   2890 
   2891 void
   2892 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
   2893 {
   2894 	int	i, n;
   2895 
   2896 	for (i = 0; i < npaths; i++) {
   2897 		n = strlen(pathlist[i]) + 1;
   2898 		kmem_free(pathlist[i], n);
   2899 	}
   2900 
   2901 	kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
   2902 }
   2903 
   2904 int
   2905 sdev_modctl_devexists(const char *path)
   2906 {
   2907 	vnode_t *vp;
   2908 	int error;
   2909 
   2910 	error = sdev_modctl_lookup(path, &vp);
   2911 	sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
   2912 	    path, curproc->p_user.u_comm,
   2913 	    (error == 0) ? "ok" : "failed"));
   2914 	if (error == 0)
   2915 		VN_RELE(vp);
   2916 
   2917 	return (error);
   2918 }
   2919 
   2920 extern int sdev_vnodeops_tbl_size;
   2921 
   2922 /*
   2923  * construct a new template with overrides from vtab
   2924  */
   2925 static fs_operation_def_t *
   2926 sdev_merge_vtab(const fs_operation_def_t tab[])
   2927 {
   2928 	fs_operation_def_t *new;
   2929 	const fs_operation_def_t *tab_entry;
   2930 
   2931 	/* make a copy of standard vnode ops table */
   2932 	new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
   2933 	bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
   2934 
   2935 	/* replace the overrides from tab */
   2936 	for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
   2937 		fs_operation_def_t *std_entry = new;
   2938 		while (std_entry->name) {
   2939 			if (strcmp(tab_entry->name, std_entry->name) == 0) {
   2940 				std_entry->func = tab_entry->func;
   2941 				break;
   2942 			}
   2943 			std_entry++;
   2944 		}
   2945 		if (std_entry->name == NULL)
   2946 			cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
   2947 			    tab_entry->name);
   2948 	}
   2949 
   2950 	return (new);
   2951 }
   2952 
   2953 /* free memory allocated by sdev_merge_vtab */
   2954 static void
   2955 sdev_free_vtab(fs_operation_def_t *new)
   2956 {
   2957 	kmem_free(new, sdev_vnodeops_tbl_size);
   2958 }
   2959 
   2960 /*
   2961  * a generic setattr() function
   2962  *
   2963  * note: flags only supports AT_UID and AT_GID.
   2964  *	 Future enhancements can be done for other types, e.g. AT_MODE
   2965  */
   2966 int
   2967 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
   2968     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
   2969     int), int protocol)
   2970 {
   2971 	struct sdev_node	*dv = VTOSDEV(vp);
   2972 	struct sdev_node	*parent = dv->sdev_dotdot;
   2973 	struct vattr		*get;
   2974 	uint_t			mask = vap->va_mask;
   2975 	int 			error;
   2976 
   2977 	/* some sanity checks */
   2978 	if (vap->va_mask & AT_NOSET)
   2979 		return (EINVAL);
   2980 
   2981 	if (vap->va_mask & AT_SIZE) {
   2982 		if (vp->v_type == VDIR) {
   2983 			return (EISDIR);
   2984 		}
   2985 	}
   2986 
   2987 	/* no need to set attribute, but do not fail either */
   2988 	ASSERT(parent);
   2989 	rw_enter(&parent->sdev_contents, RW_READER);
   2990 	if (dv->sdev_state == SDEV_ZOMBIE) {
   2991 		rw_exit(&parent->sdev_contents);
   2992 		return (0);
   2993 	}
   2994 
   2995 	/* If backing store exists, just set it. */
   2996 	if (dv->sdev_attrvp) {
   2997 		rw_exit(&parent->sdev_contents);
   2998 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
   2999 	}
   3000 
   3001 	/*
   3002 	 * Otherwise, for nodes with the persistence attribute, create it.
   3003 	 */
   3004 	ASSERT(dv->sdev_attr);
   3005 	if (SDEV_IS_PERSIST(dv) ||
   3006 	    ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
   3007 		sdev_vattr_merge(dv, vap);
   3008 		rw_enter(&dv->sdev_contents, RW_WRITER);
   3009 		error = sdev_shadow_node(dv, cred);
   3010 		rw_exit(&dv->sdev_contents);
   3011 		rw_exit(&parent->sdev_contents);
   3012 
   3013 		if (error)
   3014 			return (error);
   3015 		return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
   3016 	}
   3017 
   3018 
   3019 	/*
   3020 	 * sdev_attr was allocated in sdev_mknode
   3021 	 */
   3022 	rw_enter(&dv->sdev_contents, RW_WRITER);
   3023 	error = secpolicy_vnode_setattr(cred, vp, vap,
   3024 	    dv->sdev_attr, flags, sdev_unlocked_access, dv);
   3025 	if (error) {
   3026 		rw_exit(&dv->sdev_contents);
   3027 		rw_exit(&parent->sdev_contents);
   3028 		return (error);
   3029 	}
   3030 
   3031 	get = dv->sdev_attr;
   3032 	if (mask & AT_MODE) {
   3033 		get->va_mode &= S_IFMT;
   3034 		get->va_mode |= vap->va_mode & ~S_IFMT;
   3035 	}
   3036 
   3037 	if ((mask & AT_UID) || (mask & AT_GID)) {
   3038 		if (mask & AT_UID)
   3039 			get->va_uid = vap->va_uid;
   3040 		if (mask & AT_GID)
   3041 			get->va_gid = vap->va_gid;
   3042 		/*
   3043 		 * a callback must be provided if the protocol is set
   3044 		 */
   3045 		if ((protocol & AT_UID) || (protocol & AT_GID)) {
   3046 			ASSERT(callback);
   3047 			error = callback(dv, get, protocol);
   3048 			if (error) {
   3049 				rw_exit(&dv->sdev_contents);
   3050 				rw_exit(&parent->sdev_contents);
   3051 				return (error);
   3052 			}
   3053 		}
   3054 	}
   3055 
   3056 	if (mask & AT_ATIME)
   3057 		get->va_atime = vap->va_atime;
   3058 	if (mask & AT_MTIME)
   3059 		get->va_mtime = vap->va_mtime;
   3060 	if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
   3061 		gethrestime(&get->va_ctime);
   3062 	}
   3063 
   3064 	sdev_vattr_merge(dv, get);
   3065 	rw_exit(&dv->sdev_contents);
   3066 	rw_exit(&parent->sdev_contents);
   3067 	return (0);
   3068 }
   3069 
   3070 /*
   3071  * a generic inactive() function
   3072  */
   3073 /*ARGSUSED*/
   3074 void
   3075 devname_inactive_func(struct vnode *vp, struct cred *cred,
   3076     void (*callback)(struct vnode *))
   3077 {
   3078 	int clean;
   3079 	struct sdev_node *dv = VTOSDEV(vp);
   3080 	struct sdev_node *ddv = dv->sdev_dotdot;
   3081 	int state;
   3082 
   3083 	rw_enter(&ddv->sdev_contents, RW_WRITER);
   3084 	state = dv->sdev_state;
   3085 
   3086 	mutex_enter(&vp->v_lock);
   3087 	ASSERT(vp->v_count >= 1);
   3088 
   3089 	if (vp->v_count == 1 && callback != NULL)
   3090 		callback(vp);
   3091 
   3092 	clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
   3093 
   3094 	/*
   3095 	 * last ref count on the ZOMBIE node is released.
   3096 	 * clean up the sdev_node, and
   3097 	 * release the hold on the backing store node so that
   3098 	 * the ZOMBIE backing stores also cleaned out.
   3099 	 */
   3100 	if (clean) {
   3101 		ASSERT(ddv);
   3102 
   3103 		ddv->sdev_nlink--;
   3104 		if (vp->v_type == VDIR) {
   3105 			dv->sdev_nlink--;
   3106 		}
   3107 		if ((dv->sdev_flags & SDEV_STALE) == 0)
   3108 			avl_remove(&ddv->sdev_entries, dv);
   3109 		dv->sdev_nlink--;
   3110 		--vp->v_count;
   3111 		mutex_exit(&vp->v_lock);
   3112 		sdev_nodedestroy(dv, 0);
   3113 	} else {
   3114 		--vp->v_count;
   3115 		mutex_exit(&vp->v_lock);
   3116 	}
   3117 	rw_exit(&ddv->sdev_contents);
   3118 }
   3119