Home | History | Annotate | Download | only in dev
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * utility routines for the /dev fs
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/t_lock.h>
     33 #include <sys/systm.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/user.h>
     36 #include <sys/time.h>
     37 #include <sys/vfs.h>
     38 #include <sys/vnode.h>
     39 #include <sys/file.h>
     40 #include <sys/fcntl.h>
     41 #include <sys/flock.h>
     42 #include <sys/kmem.h>
     43 #include <sys/uio.h>
     44 #include <sys/errno.h>
     45 #include <sys/stat.h>
     46 #include <sys/cred.h>
     47 #include <sys/dirent.h>
     48 #include <sys/pathname.h>
     49 #include <sys/cmn_err.h>
     50 #include <sys/debug.h>
     51 #include <sys/mode.h>
     52 #include <sys/policy.h>
     53 #include <fs/fs_subr.h>
     54 #include <sys/mount.h>
     55 #include <sys/fs/snode.h>
     56 #include <sys/fs/dv_node.h>
     57 #include <sys/fs/sdev_impl.h>
     58 #include <sys/fs/sdev_node.h>
     59 #include <sys/sunndi.h>
     60 #include <sys/sunmdi.h>
     61 #include <sys/conf.h>
     62 #include <sys/proc.h>
     63 #include <sys/user.h>
     64 #include <sys/modctl.h>
     65 
     66 #ifdef DEBUG
     67 int sdev_debug = 0x00000001;
     68 int sdev_debug_cache_flags = 0;
     69 #endif
     70 
     71 /*
     72  * globals
     73  */
     74 /* prototype memory vattrs */
     75 vattr_t sdev_vattr_dir = {
     76 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
     77 	VDIR,					/* va_type */
     78 	SDEV_DIRMODE_DEFAULT,			/* va_mode */
     79 	SDEV_UID_DEFAULT,			/* va_uid */
     80 	SDEV_GID_DEFAULT,			/* va_gid */
     81 	0,					/* va_fsid */
     82 	0,					/* va_nodeid */
     83 	0,					/* va_nlink */
     84 	0,					/* va_size */
     85 	0,					/* va_atime */
     86 	0,					/* va_mtime */
     87 	0,					/* va_ctime */
     88 	0,					/* va_rdev */
     89 	0,					/* va_blksize */
     90 	0,					/* va_nblocks */
     91 	0					/* va_vcode */
     92 };
     93 
     94 vattr_t sdev_vattr_lnk = {
     95 	AT_TYPE|AT_MODE,			/* va_mask */
     96 	VLNK,					/* va_type */
     97 	SDEV_LNKMODE_DEFAULT,			/* va_mode */
     98 	SDEV_UID_DEFAULT,			/* va_uid */
     99 	SDEV_GID_DEFAULT,			/* va_gid */
    100 	0,					/* va_fsid */
    101 	0,					/* va_nodeid */
    102 	0,					/* va_nlink */
    103 	0,					/* va_size */
    104 	0,					/* va_atime */
    105 	0,					/* va_mtime */
    106 	0,					/* va_ctime */
    107 	0,					/* va_rdev */
    108 	0,					/* va_blksize */
    109 	0,					/* va_nblocks */
    110 	0					/* va_vcode */
    111 };
    112 
    113 vattr_t sdev_vattr_blk = {
    114 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
    115 	VBLK,					/* va_type */
    116 	S_IFBLK | SDEV_DEVMODE_DEFAULT,		/* va_mode */
    117 	SDEV_UID_DEFAULT,			/* va_uid */
    118 	SDEV_GID_DEFAULT,			/* va_gid */
    119 	0,					/* va_fsid */
    120 	0,					/* va_nodeid */
    121 	0,					/* va_nlink */
    122 	0,					/* va_size */
    123 	0,					/* va_atime */
    124 	0,					/* va_mtime */
    125 	0,					/* va_ctime */
    126 	0,					/* va_rdev */
    127 	0,					/* va_blksize */
    128 	0,					/* va_nblocks */
    129 	0					/* va_vcode */
    130 };
    131 
    132 vattr_t sdev_vattr_chr = {
    133 	AT_TYPE|AT_MODE|AT_UID|AT_GID,		/* va_mask */
    134 	VCHR,					/* va_type */
    135 	S_IFCHR | SDEV_DEVMODE_DEFAULT,		/* va_mode */
    136 	SDEV_UID_DEFAULT,			/* va_uid */
    137 	SDEV_GID_DEFAULT,			/* va_gid */
    138 	0,					/* va_fsid */
    139 	0,					/* va_nodeid */
    140 	0,					/* va_nlink */
    141 	0,					/* va_size */
    142 	0,					/* va_atime */
    143 	0,					/* va_mtime */
    144 	0,					/* va_ctime */
    145 	0,					/* va_rdev */
    146 	0,					/* va_blksize */
    147 	0,					/* va_nblocks */
    148 	0					/* va_vcode */
    149 };
    150 
    151 kmem_cache_t	*sdev_node_cache;	/* sdev_node cache */
    152 int		devtype;		/* fstype */
    153 
    154 struct devname_ops *devname_ns_ops;	/* default name service directory ops */
    155 kmutex_t devname_nsmaps_lock;	/* protect devname_nsmaps */
    156 
    157 /* static */
    158 static struct devname_nsmap *devname_nsmaps = NULL;
    159 				/* contents from /etc/dev/devname_master */
    160 static int devname_nsmaps_invalidated = 0; /* "devfsadm -m" has run */
    161 
    162 static struct vnodeops *sdev_get_vop(struct sdev_node *);
    163 static void sdev_set_no_nocache(struct sdev_node *);
    164 static int sdev_get_moduleops(struct sdev_node *);
    165 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
    166 static void sdev_free_vtab(fs_operation_def_t *);
    167 
    168 static void
    169 sdev_prof_free(struct sdev_node *dv)
    170 {
    171 	ASSERT(!SDEV_IS_GLOBAL(dv));
    172 	if (dv->sdev_prof.dev_name)
    173 		nvlist_free(dv->sdev_prof.dev_name);
    174 	if (dv->sdev_prof.dev_map)
    175 		nvlist_free(dv->sdev_prof.dev_map);
    176 	if (dv->sdev_prof.dev_symlink)
    177 		nvlist_free(dv->sdev_prof.dev_symlink);
    178 	if (dv->sdev_prof.dev_glob_incdir)
    179 		nvlist_free(dv->sdev_prof.dev_glob_incdir);
    180 	if (dv->sdev_prof.dev_glob_excdir)
    181 		nvlist_free(dv->sdev_prof.dev_glob_excdir);
    182 	bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    183 }
    184 
    185 /* sdev_node cache constructor */
    186 /*ARGSUSED1*/
    187 static int
    188 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
    189 {
    190 	struct sdev_node *dv = (struct sdev_node *)buf;
    191 	struct vnode *vp;
    192 
    193 	bzero(buf, sizeof (struct sdev_node));
    194 	vp = dv->sdev_vnode = vn_alloc(flag);
    195 	if (vp == NULL) {
    196 		return (-1);
    197 	}
    198 	vp->v_data = dv;
    199 	rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
    200 	return (0);
    201 }
    202 
    203 /* sdev_node cache destructor */
    204 /*ARGSUSED1*/
    205 static void
    206 i_sdev_node_dtor(void *buf, void *arg)
    207 {
    208 	struct sdev_node *dv = (struct sdev_node *)buf;
    209 	struct vnode *vp = SDEVTOV(dv);
    210 
    211 	rw_destroy(&dv->sdev_contents);
    212 	vn_free(vp);
    213 }
    214 
    215 /* initialize sdev_node cache */
    216 void
    217 sdev_node_cache_init()
    218 {
    219 	int flags = 0;
    220 
    221 #ifdef	DEBUG
    222 	flags = sdev_debug_cache_flags;
    223 	if (flags)
    224 		sdcmn_err(("cache debug flags 0x%x\n", flags));
    225 #endif	/* DEBUG */
    226 
    227 	ASSERT(sdev_node_cache == NULL);
    228 	sdev_node_cache = kmem_cache_create("sdev_node_cache",
    229 	    sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
    230 	    NULL, NULL, NULL, flags);
    231 }
    232 
    233 /* destroy sdev_node cache */
    234 void
    235 sdev_node_cache_fini()
    236 {
    237 	ASSERT(sdev_node_cache != NULL);
    238 	kmem_cache_destroy(sdev_node_cache);
    239 	sdev_node_cache = NULL;
    240 }
    241 
    242 /*
    243  * Compare two nodes lexographically to balance avl tree
    244  */
    245 static int
    246 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
    247 {
    248 	int rv;
    249 	if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
    250 		return (0);
    251 	return ((rv < 0) ? -1 : 1);
    252 }
    253 
    254 void
    255 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
    256 {
    257 	ASSERT(dv);
    258 	ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
    259 	dv->sdev_state = state;
    260 }
    261 
    262 static void
    263 sdev_attrinit(struct sdev_node *dv, vattr_t *vap)
    264 {
    265 	timestruc_t now;
    266 
    267 	ASSERT(vap);
    268 
    269 	dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
    270 	*dv->sdev_attr = *vap;
    271 
    272 	dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode);
    273 
    274 	gethrestime(&now);
    275 	dv->sdev_attr->va_atime = now;
    276 	dv->sdev_attr->va_mtime = now;
    277 	dv->sdev_attr->va_ctime = now;
    278 }
    279 
    280 /* alloc and initialize a sdev_node */
    281 int
    282 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
    283     vattr_t *vap)
    284 {
    285 	struct sdev_node *dv = NULL;
    286 	struct vnode *vp;
    287 	size_t nmlen, len;
    288 	devname_handle_t  *dhl;
    289 
    290 	nmlen = strlen(nm) + 1;
    291 	if (nmlen > MAXNAMELEN) {
    292 		sdcmn_err9(("sdev_nodeinit: node name %s"
    293 		    " too long\n", nm));
    294 		*newdv = NULL;
    295 		return (ENAMETOOLONG);
    296 	}
    297 
    298 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
    299 
    300 	dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
    301 	bcopy(nm, dv->sdev_name, nmlen);
    302 	dv->sdev_namelen = nmlen - 1;	/* '\0' not included */
    303 	len = strlen(ddv->sdev_path) + strlen(nm) + 2;
    304 	dv->sdev_path = kmem_alloc(len, KM_SLEEP);
    305 	(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
    306 	/* overwritten for VLNK nodes */
    307 	dv->sdev_symlink = NULL;
    308 
    309 	vp = SDEVTOV(dv);
    310 	vn_reinit(vp);
    311 	vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
    312 	if (vap)
    313 		vp->v_type = vap->va_type;
    314 
    315 	/*
    316 	 * initialized to the parent's vnodeops.
    317 	 * maybe overwriten for a VDIR
    318 	 */
    319 	vn_setops(vp, vn_getops(SDEVTOV(ddv)));
    320 	vn_exists(vp);
    321 
    322 	dv->sdev_dotdot = NULL;
    323 	dv->sdev_attrvp = NULL;
    324 	if (vap) {
    325 		sdev_attrinit(dv, vap);
    326 	} else {
    327 		dv->sdev_attr = NULL;
    328 	}
    329 
    330 	dv->sdev_ino = sdev_mkino(dv);
    331 	dv->sdev_nlink = 0;		/* updated on insert */
    332 	dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
    333 	dv->sdev_flags |= SDEV_BUILD;
    334 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
    335 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
    336 	if (SDEV_IS_GLOBAL(ddv)) {
    337 		dv->sdev_flags |= SDEV_GLOBAL;
    338 		dv->sdev_mapinfo = NULL;
    339 		dhl = &(dv->sdev_handle);
    340 		dhl->dh_data = dv;
    341 		dhl->dh_spec = DEVNAME_NS_NONE;
    342 		dhl->dh_args = NULL;
    343 		sdev_set_no_nocache(dv);
    344 		dv->sdev_gdir_gen = 0;
    345 	} else {
    346 		dv->sdev_flags &= ~SDEV_GLOBAL;
    347 		dv->sdev_origin = NULL; /* set later */
    348 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    349 		dv->sdev_ldir_gen = 0;
    350 		dv->sdev_devtree_gen = 0;
    351 	}
    352 
    353 	rw_enter(&dv->sdev_contents, RW_WRITER);
    354 	sdev_set_nodestate(dv, SDEV_INIT);
    355 	rw_exit(&dv->sdev_contents);
    356 	*newdv = dv;
    357 
    358 	return (0);
    359 }
    360 
    361 /*
    362  * transition a sdev_node into SDEV_READY state
    363  */
    364 int
    365 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
    366     void *args, struct cred *cred)
    367 {
    368 	int error = 0;
    369 	struct vnode *vp = SDEVTOV(dv);
    370 	vtype_t type;
    371 
    372 	ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
    373 
    374 	type = vap->va_type;
    375 	vp->v_type = type;
    376 	vp->v_rdev = vap->va_rdev;
    377 	rw_enter(&dv->sdev_contents, RW_WRITER);
    378 	if (type == VDIR) {
    379 		dv->sdev_nlink = 2;
    380 		dv->sdev_flags &= ~SDEV_PERSIST;
    381 		dv->sdev_flags &= ~SDEV_DYNAMIC;
    382 		vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
    383 		error = sdev_get_moduleops(dv); /* from plug-in module */
    384 		ASSERT(dv->sdev_dotdot);
    385 		ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
    386 		vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
    387 		avl_create(&dv->sdev_entries,
    388 		    (int (*)(const void *, const void *))sdev_compare_nodes,
    389 		    sizeof (struct sdev_node),
    390 		    offsetof(struct sdev_node, sdev_avllink));
    391 	} else if (type == VLNK) {
    392 		ASSERT(args);
    393 		dv->sdev_nlink = 1;
    394 		dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
    395 	} else {
    396 		dv->sdev_nlink = 1;
    397 	}
    398 
    399 	if (!(SDEV_IS_GLOBAL(dv))) {
    400 		dv->sdev_origin = (struct sdev_node *)args;
    401 		dv->sdev_flags &= ~SDEV_PERSIST;
    402 	}
    403 
    404 	/*
    405 	 * shadow node is created here OR
    406 	 * if failed (indicated by dv->sdev_attrvp == NULL),
    407 	 * created later in sdev_setattr
    408 	 */
    409 	if (avp) {
    410 		dv->sdev_attrvp = avp;
    411 	} else {
    412 		if (dv->sdev_attr == NULL)
    413 			sdev_attrinit(dv, vap);
    414 		else
    415 			*dv->sdev_attr = *vap;
    416 
    417 		if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) ||
    418 		    ((SDEVTOV(dv)->v_type == VDIR) &&
    419 		    (dv->sdev_attrvp == NULL))) {
    420 			error = sdev_shadow_node(dv, cred);
    421 		}
    422 	}
    423 
    424 	if (error == 0) {
    425 		/* transition to READY state */
    426 		sdev_set_nodestate(dv, SDEV_READY);
    427 		sdev_nc_node_exists(dv);
    428 	} else {
    429 		sdev_set_nodestate(dv, SDEV_ZOMBIE);
    430 	}
    431 	rw_exit(&dv->sdev_contents);
    432 	return (error);
    433 }
    434 
    435 /*
    436  * setting ZOMBIE state
    437  */
    438 static int
    439 sdev_nodezombied(struct sdev_node *dv)
    440 {
    441 	rw_enter(&dv->sdev_contents, RW_WRITER);
    442 	sdev_set_nodestate(dv, SDEV_ZOMBIE);
    443 	rw_exit(&dv->sdev_contents);
    444 	return (0);
    445 }
    446 
    447 /*
    448  * Build the VROOT sdev_node.
    449  */
    450 /*ARGSUSED*/
    451 struct sdev_node *
    452 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
    453     struct vnode *avp, struct cred *cred)
    454 {
    455 	struct sdev_node *dv;
    456 	struct vnode *vp;
    457 	char devdir[] = "/dev";
    458 
    459 	ASSERT(sdev_node_cache != NULL);
    460 	ASSERT(avp);
    461 	dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
    462 	vp = SDEVTOV(dv);
    463 	vn_reinit(vp);
    464 	vp->v_flag |= VROOT;
    465 	vp->v_vfsp = vfsp;
    466 	vp->v_type = VDIR;
    467 	vp->v_rdev = devdev;
    468 	vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
    469 	vn_exists(vp);
    470 
    471 	if (vfsp->vfs_mntpt)
    472 		dv->sdev_name = i_ddi_strdup(
    473 		    (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
    474 	else
    475 		/* vfs_mountdev1 set mount point later */
    476 		dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
    477 	dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
    478 	dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
    479 	dv->sdev_ino = SDEV_ROOTINO;
    480 	dv->sdev_nlink = 2;		/* name + . (no sdev_insert) */
    481 	dv->sdev_dotdot = dv;		/* .. == self */
    482 	dv->sdev_attrvp = avp;
    483 	dv->sdev_attr = NULL;
    484 	mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
    485 	cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
    486 	if (strcmp(dv->sdev_name, "/dev") == 0) {
    487 		mutex_init(&devname_nsmaps_lock, NULL, MUTEX_DEFAULT, NULL);
    488 		dv->sdev_mapinfo = NULL;
    489 		dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
    490 		bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
    491 		dv->sdev_gdir_gen = 0;
    492 	} else {
    493 		dv->sdev_flags = SDEV_BUILD;
    494 		dv->sdev_flags &= ~SDEV_PERSIST;
    495 		bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
    496 		dv->sdev_ldir_gen = 0;
    497 		dv->sdev_devtree_gen = 0;
    498 	}
    499 
    500 	avl_create(&dv->sdev_entries,
    501 	    (int (*)(const void *, const void *))sdev_compare_nodes,
    502 	    sizeof (struct sdev_node),
    503 	    offsetof(struct sdev_node, sdev_avllink));
    504 
    505 	rw_enter(&dv->sdev_contents, RW_WRITER);
    506 	sdev_set_nodestate(dv, SDEV_READY);
    507 	rw_exit(&dv->sdev_contents);
    508 	sdev_nc_node_exists(dv);
    509 	return (dv);
    510 }
    511 
    512 /*
    513  *  1. load the module
    514  *  2. modload invokes sdev_module_register, which in turn sets
    515  *     the dv->sdev_mapinfo->dir_ops
    516  *
    517  * note: locking order:
    518  *	dv->sdev_contents -> map->dir_lock
    519  */
    520 static int
    521 sdev_get_moduleops(struct sdev_node *dv)
    522 {
    523 	int error = 0;
    524 	struct devname_nsmap *map = NULL;
    525 	char *module;
    526 	char *path;
    527 	int load = 1;
    528 
    529 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
    530 
    531 	if (devname_nsmaps == NULL)
    532 		return (0);
    533 
    534 	if (!sdev_nsmaps_loaded() && !sdev_nsmaps_reloaded())
    535 		return (0);
    536 
    537 
    538 	path = dv->sdev_path;
    539 	if ((map = sdev_get_nsmap_by_dir(path, 0))) {
    540 		rw_enter(&map->dir_lock, RW_READER);
    541 		if (map->dir_invalid) {
    542 			if (map->dir_module && map->dir_newmodule &&
    543 			    (strcmp(map->dir_module,
    544 			    map->dir_newmodule) == 0)) {
    545 				load = 0;
    546 			}
    547 			sdev_replace_nsmap(map, map->dir_newmodule,
    548 			    map->dir_newmap);
    549 		}
    550 
    551 		module = map->dir_module;
    552 		if (module && load) {
    553 			sdcmn_err6(("sdev_get_moduleops: "
    554 			    "load module %s", module));
    555 			rw_exit(&map->dir_lock);
    556 			error = modload("devname", module);
    557 			sdcmn_err6(("sdev_get_moduleops: error %d\n", error));
    558 			if (error < 0) {
    559 				return (-1);
    560 			}
    561 		} else if (module == NULL) {
    562 			/*
    563 			 * loading the module ops for name services
    564 			 */
    565 			if (devname_ns_ops == NULL) {
    566 				sdcmn_err6((
    567 				    "sdev_get_moduleops: modload default\n"));
    568 				error = modload("devname", DEVNAME_NSCONFIG);
    569 				sdcmn_err6((
    570 				    "sdev_get_moduleops: error %d\n", error));
    571 				if (error < 0) {
    572 					return (-1);
    573 				}
    574 			}
    575 
    576 			if (!rw_tryupgrade(&map->dir_lock)) {
    577 				rw_exit(&map->dir_lock);
    578 				rw_enter(&map->dir_lock, RW_WRITER);
    579 			}
    580 			ASSERT(devname_ns_ops);
    581 			map->dir_ops = devname_ns_ops;
    582 			rw_exit(&map->dir_lock);
    583 		}
    584 	}
    585 
    586 	dv->sdev_mapinfo = map;
    587 	return (0);
    588 }
    589 
    590 /* directory dependent vop table */
    591 struct sdev_vop_table {
    592 	char *vt_name;				/* subdirectory name */
    593 	const fs_operation_def_t *vt_service;	/* vnodeops table */
    594 	struct vnodeops *vt_vops;		/* constructed vop */
    595 	struct vnodeops **vt_global_vops;	/* global container for vop */
    596 	int (*vt_vtor)(struct sdev_node *);	/* validate sdev_node */
    597 	int vt_flags;
    598 };
    599 
    600 /*
    601  * A nice improvement would be to provide a plug-in mechanism
    602  * for this table instead of a const table.
    603  */
    604 static struct sdev_vop_table vtab[] =
    605 {
    606 	{ "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
    607 	SDEV_DYNAMIC | SDEV_VTOR },
    608 
    609 	{ "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
    610 	SDEV_DYNAMIC | SDEV_VTOR },
    611 
    612 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
    613 
    614 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
    615 	SDEV_DYNAMIC | SDEV_VTOR },
    616 
    617 	{ NULL, NULL, NULL, NULL, NULL, 0}
    618 };
    619 
    620 
    621 /*
    622  *  sets a directory's vnodeops if the directory is in the vtab;
    623  */
    624 static struct vnodeops *
    625 sdev_get_vop(struct sdev_node *dv)
    626 {
    627 	int i;
    628 	char *path;
    629 
    630 	path = dv->sdev_path;
    631 	ASSERT(path);
    632 
    633 	/* gets the relative path to /dev/ */
    634 	path += 5;
    635 
    636 	/* gets the vtab entry if matches */
    637 	for (i = 0; vtab[i].vt_name; i++) {
    638 		if (strcmp(vtab[i].vt_name, path) != 0)
    639 			continue;
    640 		dv->sdev_flags |= vtab[i].vt_flags;
    641 
    642 		if (vtab[i].vt_vops) {
    643 			if (vtab[i].vt_global_vops)
    644 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
    645 			return (vtab[i].vt_vops);
    646 		}
    647 
    648 		if (vtab[i].vt_service) {
    649 			fs_operation_def_t *templ;
    650 			templ = sdev_merge_vtab(vtab[i].vt_service);
    651 			if (vn_make_ops(vtab[i].vt_name,
    652 			    (const fs_operation_def_t *)templ,
    653 			    &vtab[i].vt_vops) != 0) {
    654 				cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
    655 				    vtab[i].vt_name);
    656 				/*NOTREACHED*/
    657 			}
    658 			if (vtab[i].vt_global_vops) {
    659 				*(vtab[i].vt_global_vops) = vtab[i].vt_vops;
    660 			}
    661 			sdev_free_vtab(templ);
    662 			return (vtab[i].vt_vops);
    663 		}
    664 		return (sdev_vnodeops);
    665 	}
    666 
    667 	/* child inherits the persistence of the parent */
    668 	if (SDEV_IS_PERSIST(dv->sdev_dotdot))
    669 		dv->sdev_flags |= SDEV_PERSIST;
    670 
    671 	return (sdev_vnodeops);
    672 }
    673 
    674 static void
    675 sdev_set_no_nocache(struct sdev_node *dv)
    676 {
    677 	int i;
    678 	char *path;
    679 
    680 	ASSERT(dv->sdev_path);
    681 	path = dv->sdev_path + strlen("/dev/");
    682 
    683 	for (i = 0; vtab[i].vt_name; i++) {
    684 		if (strcmp(vtab[i].vt_name, path) == 0) {
    685 			if (vtab[i].vt_flags & SDEV_NO_NCACHE)
    686 				dv->sdev_flags |= SDEV_NO_NCACHE;
    687 			break;
    688 		}
    689 	}
    690 }
    691 
    692 void *
    693 sdev_get_vtor(struct sdev_node *dv)
    694 {
    695 	int i;
    696 
    697 	for (i = 0; vtab[i].vt_name; i++) {
    698 		if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0)
    699 			continue;
    700 		return ((void *)vtab[i].vt_vtor);
    701 	}
    702 	return (NULL);
    703 }
    704 
    705 /*
    706  * Build the base root inode
    707  */
    708 ino_t
    709 sdev_mkino(struct sdev_node *dv)
    710 {
    711 	ino_t	ino;
    712 
    713 	/*
    714 	 * for now, follow the lead of tmpfs here
    715 	 * need to someday understand the requirements here
    716 	 */
    717 	ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
    718 	ino += SDEV_ROOTINO + 1;
    719 
    720 	return (ino);
    721 }
    722 
    723 static int
    724 sdev_getlink(struct vnode *linkvp, char **link)
    725 {
    726 	int err;
    727 	char *buf;
    728 	struct uio uio = {0};
    729 	struct iovec iov = {0};
    730 
    731 	if (linkvp == NULL)
    732 		return (ENOENT);
    733 	ASSERT(linkvp->v_type == VLNK);
    734 
    735 	buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
    736 	iov.iov_base = buf;
    737 	iov.iov_len = MAXPATHLEN;
    738 	uio.uio_iov = &iov;
    739 	uio.uio_iovcnt = 1;
    740 	uio.uio_resid = MAXPATHLEN;
    741 	uio.uio_segflg = UIO_SYSSPACE;
    742 	uio.uio_llimit = MAXOFFSET_T;
    743 
    744 	err = VOP_READLINK(linkvp, &uio, kcred, NULL);
    745 	if (err) {
    746 		cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
    747 		kmem_free(buf, MAXPATHLEN);
    748 		return (ENOENT);
    749 	}
    750 
    751 	/* mission complete */
    752 	*link = i_ddi_strdup(buf, KM_SLEEP);
    753 	kmem_free(buf, MAXPATHLEN);
    754 	return (0);
    755 }
    756 
    757 /*
    758  * A convenient wrapper to get the devfs node vnode for a device
    759  * minor functionality: readlink() of a /dev symlink
    760  * Place the link into dv->sdev_symlink
    761  */
    762 static int
    763 sdev_follow_link(struct sdev_node *dv)
    764 {
    765 	int err;
    766 	struct vnode *linkvp;
    767 	char *link = NULL;
    768 
    769 	linkvp = SDEVTOV(dv);
    770 	if (linkvp == NULL)
    771 		return (ENOENT);
    772 	ASSERT(linkvp->v_type == VLNK);
    773 	err = sdev_getlink(linkvp, &link);
    774 	if (err) {
    775 		(void) sdev_nodezombied(dv);
    776 		dv->sdev_symlink = NULL;
    777 		return (ENOENT);
    778 	}
    779 
    780 	ASSERT(link != NULL);
    781 	dv->sdev_symlink = link;
    782 	return (0);
    783 }
    784 
    785 static int
    786 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
    787 {
    788 	vtype_t otype = SDEVTOV(dv)->v_type;
    789 
    790 	/*
    791 	 * existing sdev_node has a different type.
    792 	 */
    793 	if (otype != nvap->va_type) {
    794 		sdcmn_err9(("sdev_node_check: existing node "
    795 		    "  %s type %d does not match new node type %d\n",
    796 		    dv->sdev_name, otype, nvap->va_type));
    797 		return (EEXIST);
    798 	}
    799 
    800 	/*
    801 	 * For a symlink, the target should be the same.
    802 	 */
    803 	if (otype == VLNK) {
    804 		ASSERT(nargs != NULL);
    805 		ASSERT(dv->sdev_symlink != NULL);
    806 		if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
    807 			sdcmn_err9(("sdev_node_check: existing node "
    808 			    " %s has different symlink %s as new node "
    809 			    " %s\n", dv->sdev_name, dv->sdev_symlink,
    810 			    (char *)nargs));
    811 			return (EEXIST);
    812 		}
    813 	}
    814 
    815 	return (0);
    816 }
    817 
    818 /*
    819  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
    820  *
    821  * arguments:
    822  *	- ddv (parent)
    823  *	- nm (child name)
    824  *	- newdv (sdev_node for nm is returned here)
    825  *	- vap (vattr for the node to be created, va_type should be set.
    826  *	- avp (attribute vnode)
    827  *	  the defaults should be used if unknown)
    828  *	- cred
    829  *	- args
    830  *	    . tnm (for VLNK)
    831  *	    . global sdev_node (for !SDEV_GLOBAL)
    832  * 	- state: SDEV_INIT, SDEV_READY
    833  *
    834  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
    835  *
    836  * NOTE:  directory contents writers lock needs to be held before
    837  *	  calling this routine.
    838  */
    839 int
    840 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
    841     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
    842     sdev_node_state_t state)
    843 {
    844 	int error = 0;
    845 	sdev_node_state_t node_state;
    846 	struct sdev_node *dv = NULL;
    847 
    848 	ASSERT(state != SDEV_ZOMBIE);
    849 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
    850 
    851 	if (*newdv) {
    852 		dv = *newdv;
    853 	} else {
    854 		/* allocate and initialize a sdev_node */
    855 		if (ddv->sdev_state == SDEV_ZOMBIE) {
    856 			sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
    857 			    ddv->sdev_path));
    858 			return (ENOENT);
    859 		}
    860 
    861 		error = sdev_nodeinit(ddv, nm, &dv, vap);
    862 		if (error != 0) {
    863 			sdcmn_err9(("sdev_mknode: error %d,"
    864 			    " name %s can not be initialized\n",
    865 			    error, nm));
    866 			return (error);
    867 		}
    868 		ASSERT(dv);
    869 
    870 		/* insert into the directory cache */
    871 		error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
    872 		if (error) {
    873 			sdcmn_err9(("sdev_mknode: node %s can not"
    874 			    " be added into directory cache\n", nm));
    875 			return (ENOENT);
    876 		}
    877 	}
    878 
    879 	ASSERT(dv);
    880 	node_state = dv->sdev_state;
    881 	ASSERT(node_state != SDEV_ZOMBIE);
    882 
    883 	if (state == SDEV_READY) {
    884 		switch (node_state) {
    885 		case SDEV_INIT:
    886 			error = sdev_nodeready(dv, vap, avp, args, cred);
    887 			if (error) {
    888 				sdcmn_err9(("sdev_mknode: node %s can NOT"
    889 				    " be transitioned into READY state, "
    890 				    "error %d\n", nm, error));
    891 			}
    892 			break;
    893 		case SDEV_READY:
    894 			/*
    895 			 * Do some sanity checking to make sure
    896 			 * the existing sdev_node is what has been
    897 			 * asked for.
    898 			 */
    899 			error = sdev_node_check(dv, vap, args);
    900 			break;
    901 		default:
    902 			break;
    903 		}
    904 	}
    905 
    906 	if (!error) {
    907 		*newdv = dv;
    908 		ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
    909 	} else {
    910 		SDEV_SIMPLE_RELE(dv);
    911 		*newdv = NULL;
    912 	}
    913 
    914 	return (error);
    915 }
    916 
    917 /*
    918  * convenient wrapper to change vp's ATIME, CTIME and MTIME
    919  */
    920 void
    921 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
    922 {
    923 	struct vattr attr;
    924 	timestruc_t now;
    925 	int err;
    926 
    927 	ASSERT(vp);
    928 	gethrestime(&now);
    929 	if (mask & AT_CTIME)
    930 		attr.va_ctime = now;
    931 	if (mask & AT_MTIME)
    932 		attr.va_mtime = now;
    933 	if (mask & AT_ATIME)
    934 		attr.va_atime = now;
    935 
    936 	attr.va_mask = (mask & AT_TIMES);
    937 	err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
    938 	if (err && (err != EROFS)) {
    939 		sdcmn_err(("update timestamps error %d\n", err));
    940 	}
    941 }
    942 
    943 /*
    944  * the backing store vnode is released here
    945  */
    946 /*ARGSUSED1*/
    947 void
    948 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
    949 {
    950 	/* no references */
    951 	ASSERT(dv->sdev_nlink == 0);
    952 
    953 	if (dv->sdev_attrvp != NULLVP) {
    954 		VN_RELE(dv->sdev_attrvp);
    955 		/*
    956 		 * reset the attrvp so that no more
    957 		 * references can be made on this already
    958 		 * vn_rele() vnode
    959 		 */
    960 		dv->sdev_attrvp = NULLVP;
    961 	}
    962 
    963 	if (dv->sdev_attr != NULL) {
    964 		kmem_free(dv->sdev_attr, sizeof (struct vattr));
    965 		dv->sdev_attr = NULL;
    966 	}
    967 
    968 	if (dv->sdev_name != NULL) {
    969 		kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
    970 		dv->sdev_name = NULL;
    971 	}
    972 
    973 	if (dv->sdev_symlink != NULL) {
    974 		kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
    975 		dv->sdev_symlink = NULL;
    976 	}
    977 
    978 	if (dv->sdev_path) {
    979 		kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
    980 		dv->sdev_path = NULL;
    981 	}
    982 
    983 	if (!SDEV_IS_GLOBAL(dv))
    984 		sdev_prof_free(dv);
    985 
    986 	if (SDEVTOV(dv)->v_type == VDIR) {
    987 		ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
    988 		avl_destroy(&dv->sdev_entries);
    989 	}
    990 
    991 	mutex_destroy(&dv->sdev_lookup_lock);
    992 	cv_destroy(&dv->sdev_lookup_cv);
    993 
    994 	/* return node to initial state as per constructor */
    995 	(void) memset((void *)&dv->sdev_instance_data, 0,
    996 	    sizeof (dv->sdev_instance_data));
    997 	vn_invalid(SDEVTOV(dv));
    998 	kmem_cache_free(sdev_node_cache, dv);
    999 }
   1000 
   1001 /*
   1002  * DIRECTORY CACHE lookup
   1003  */
   1004 struct sdev_node *
   1005 sdev_findbyname(struct sdev_node *ddv, char *nm)
   1006 {
   1007 	struct sdev_node *dv;
   1008 	struct sdev_node dvtmp;
   1009 	avl_index_t	where;
   1010 
   1011 	ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
   1012 
   1013 	dvtmp.sdev_name = nm;
   1014 	dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
   1015 	if (dv) {
   1016 		ASSERT(dv->sdev_dotdot == ddv);
   1017 		ASSERT(strcmp(dv->sdev_name, nm) == 0);
   1018 		SDEV_HOLD(dv);
   1019 		return (dv);
   1020 	}
   1021 	return (NULL);
   1022 }
   1023 
   1024 /*
   1025  * Inserts a new sdev_node in a parent directory
   1026  */
   1027 void
   1028 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
   1029 {
   1030 	avl_index_t where;
   1031 
   1032 	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
   1033 	ASSERT(SDEVTOV(ddv)->v_type == VDIR);
   1034 	ASSERT(ddv->sdev_nlink >= 2);
   1035 	ASSERT(dv->sdev_nlink == 0);
   1036 
   1037 	dv->sdev_dotdot = ddv;
   1038 	VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
   1039 	avl_insert(&