Home | History | Annotate | Download | only in specfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 /*
     30  * University Copyright- Copyright (c) 1982, 1986, 1988
     31  * The Regents of the University of California
     32  * All Rights Reserved
     33  *
     34  * University Acknowledgment- Portions of this document are derived from
     35  * software developed by the University of California, Berkeley, and its
     36  * contributors.
     37  */
     38 
     39 
     40 #include <sys/types.h>
     41 #include <sys/t_lock.h>
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/buf.h>
     45 #include <sys/conf.h>
     46 #include <sys/cred.h>
     47 #include <sys/kmem.h>
     48 #include <sys/sysmacros.h>
     49 #include <sys/vfs.h>
     50 #include <sys/vfs_opreg.h>
     51 #include <sys/vnode.h>
     52 #include <sys/fs/snode.h>
     53 #include <sys/fs/fifonode.h>
     54 #include <sys/debug.h>
     55 #include <sys/errno.h>
     56 #include <sys/time.h>
     57 #include <sys/file.h>
     58 #include <sys/open.h>
     59 #include <sys/user.h>
     60 #include <sys/termios.h>
     61 #include <sys/stream.h>
     62 #include <sys/strsubr.h>
     63 #include <sys/autoconf.h>
     64 #include <sys/esunddi.h>
     65 #include <sys/flock.h>
     66 #include <sys/modctl.h>
     67 
     68 struct vfs spec_vfs;
     69 static dev_t specdev;
     70 struct kmem_cache *snode_cache;
     71 int spec_debug = 0;
     72 
     73 static struct snode *sfind(dev_t, vtype_t, struct vnode *);
     74 static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
     75 static void sinsert(struct snode *);
     76 
     77 struct vnode *
     78 specvp_devfs(
     79 	struct vnode	*realvp,
     80 	dev_t		dev,
     81 	vtype_t		vtyp,
     82 	struct cred	*cr,
     83 	dev_info_t	*dip)
     84 {
     85 	struct vnode	*vp;
     86 
     87 	ASSERT(realvp && dip);
     88 	vp = specvp(realvp, dev, vtyp, cr);
     89 	ASSERT(vp);
     90 
     91 	/* associate a dip hold with the common snode's s_dip pointer */
     92 	spec_assoc_vp_with_devi(vp, dip);
     93 	return (vp);
     94 }
     95 
     96 /*
     97  * Return a shadow special vnode for the given dev.
     98  * If no snode exists for this dev create one and put it
     99  * in a table hashed by <dev, realvp>.  If the snode for
    100  * this dev is already in the table return it (ref count is
    101  * incremented by sfind).  The snode will be flushed from the
    102  * table when spec_inactive calls sdelete.
    103  *
    104  * The fsid is inherited from the real vnode so that clones
    105  * can be found.
    106  *
    107  */
    108 struct vnode *
    109 specvp(
    110 	struct vnode	*vp,
    111 	dev_t		dev,
    112 	vtype_t		type,
    113 	struct cred	*cr)
    114 {
    115 	struct snode *sp;
    116 	struct snode *nsp;
    117 	struct snode *csp;
    118 	struct vnode *svp;
    119 	struct vattr va;
    120 	int	rc;
    121 	int	used_csp = 0;		/* Did we use pre-allocated csp */
    122 
    123 	if (vp == NULL)
    124 		return (NULL);
    125 	if (vp->v_type == VFIFO)
    126 		return (fifovp(vp, cr));
    127 
    128 	ASSERT(vp->v_type == type);
    129 	ASSERT(vp->v_rdev == dev);
    130 
    131 	/*
    132 	 * Pre-allocate snodes before holding any locks in case we block
    133 	 */
    134 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
    135 	csp = kmem_cache_alloc(snode_cache, KM_SLEEP);
    136 
    137 	/*
    138 	 * Get the time attributes outside of the stable lock since
    139 	 * this operation may block. Unfortunately, it may not have
    140 	 * been required if the snode is in the cache.
    141 	 */
    142 	va.va_mask = AT_FSID | AT_TIMES;
    143 	rc = VOP_GETATTR(vp, &va, 0, cr, NULL);	/* XXX may block! */
    144 
    145 	mutex_enter(&stable_lock);
    146 	if ((sp = sfind(dev, type, vp)) == NULL) {
    147 		struct vnode *cvp;
    148 
    149 		sp = nsp;	/* Use pre-allocated snode */
    150 		svp = STOV(sp);
    151 
    152 		sp->s_realvp	= vp;
    153 		VN_HOLD(vp);
    154 		sp->s_commonvp	= NULL;
    155 		sp->s_dev	= dev;
    156 		sp->s_dip	= NULL;
    157 		sp->s_nextr	= NULL;
    158 		sp->s_list	= NULL;
    159 		sp->s_plcy	= NULL;
    160 		sp->s_size	= 0;
    161 		sp->s_flag	= 0;
    162 		if (rc == 0) {
    163 			/*
    164 			 * Set times in snode to those in the vnode.
    165 			 */
    166 			sp->s_fsid = va.va_fsid;
    167 			sp->s_atime = va.va_atime.tv_sec;
    168 			sp->s_mtime = va.va_mtime.tv_sec;
    169 			sp->s_ctime = va.va_ctime.tv_sec;
    170 		} else {
    171 			sp->s_fsid = specdev;
    172 			sp->s_atime = 0;
    173 			sp->s_mtime = 0;
    174 			sp->s_ctime = 0;
    175 		}
    176 		sp->s_count	= 0;
    177 		sp->s_mapcnt	= 0;
    178 
    179 		vn_reinit(svp);
    180 		svp->v_flag	= (vp->v_flag & VROOT);
    181 		svp->v_vfsp	= vp->v_vfsp;
    182 		VFS_HOLD(svp->v_vfsp);
    183 		svp->v_type	= type;
    184 		svp->v_rdev	= dev;
    185 		(void) vn_copypath(vp, svp);
    186 		if (type == VBLK || type == VCHR) {
    187 			cvp = get_cvp(dev, type, csp, &used_csp);
    188 			svp->v_stream = cvp->v_stream;
    189 
    190 			sp->s_commonvp = cvp;
    191 		}
    192 		vn_exists(svp);
    193 		sinsert(sp);
    194 		mutex_exit(&stable_lock);
    195 		if (used_csp == 0) {
    196 			/* Didn't use pre-allocated snode so free it */
    197 			kmem_cache_free(snode_cache, csp);
    198 		}
    199 	} else {
    200 		mutex_exit(&stable_lock);
    201 		/* free unused snode memory */
    202 		kmem_cache_free(snode_cache, nsp);
    203 		kmem_cache_free(snode_cache, csp);
    204 	}
    205 	return (STOV(sp));
    206 }
    207 
    208 /*
    209  * Return a special vnode for the given dev; no vnode is supplied
    210  * for it to shadow.  Always create a new snode and put it in the
    211  * table hashed by <dev, NULL>.  The snode will be flushed from the
    212  * table when spec_inactive() calls sdelete().  The association of
    213  * this node with a attached instance of hardware is not made until
    214  * spec_open time.
    215  *
    216  * N.B. Assumes caller takes on responsibility of making sure no one
    217  * else is creating a snode for (dev, type) at this time.
    218  */
    219 struct vnode *
    220 makespecvp(dev_t dev, vtype_t type)
    221 {
    222 	struct snode *sp;
    223 	struct vnode *svp, *cvp;
    224 	time_t now;
    225 
    226 	sp = kmem_cache_alloc(snode_cache, KM_SLEEP);
    227 	svp = STOV(sp);
    228 	cvp = commonvp(dev, type);
    229 	now = gethrestime_sec();
    230 
    231 	sp->s_realvp	= NULL;
    232 	sp->s_commonvp	= cvp;
    233 	sp->s_dev	= dev;
    234 	sp->s_dip	= NULL;
    235 	sp->s_nextr	= NULL;
    236 	sp->s_list	= NULL;
    237 	sp->s_plcy	= NULL;
    238 	sp->s_size	= 0;
    239 	sp->s_flag	= 0;
    240 	sp->s_fsid	= specdev;
    241 	sp->s_atime	= now;
    242 	sp->s_mtime	= now;
    243 	sp->s_ctime	= now;
    244 	sp->s_count	= 0;
    245 	sp->s_mapcnt	= 0;
    246 
    247 	vn_reinit(svp);
    248 	svp->v_vfsp	= &spec_vfs;
    249 	svp->v_stream	= cvp->v_stream;
    250 	svp->v_type	= type;
    251 	svp->v_rdev	= dev;
    252 
    253 	vn_exists(svp);
    254 	mutex_enter(&stable_lock);
    255 	sinsert(sp);
    256 	mutex_exit(&stable_lock);
    257 
    258 	return (svp);
    259 }
    260 
    261 
    262 /*
    263  * This function is called from spec_assoc_vp_with_devi(). That function
    264  * associates a "new" dip with a common snode, releasing (any) old dip
    265  * in the process. This function (spec_assoc_fence()) looks at the "new dip"
    266  * and determines whether the snode should be fenced of or not. As the table
    267  * below indicates, the value of old-dip is a don't care for all cases.
    268  *
    269  * old-dip	new-dip		common-snode
    270  * =========================================
    271  * Don't care	NULL		unfence
    272  * Don't care	retired		fence
    273  * Don't care	not-retired	unfence
    274  *
    275  * Since old-dip value is a "don't care", it is not passed into this function.
    276  */
    277 static void
    278 spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
    279 {
    280 	int		fence;
    281 	struct snode	*csp;
    282 
    283 	ASSERT(vp);
    284 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
    285 
    286 	fence = 0;
    287 	if (ndip != NULL) {
    288 		mutex_enter(&DEVI(ndip)->devi_lock);
    289 		if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
    290 			fence = 1;
    291 		mutex_exit(&DEVI(ndip)->devi_lock);
    292 	}
    293 
    294 	csp = VTOCS(vp);
    295 	ASSERT(csp);
    296 
    297 	/* SFENCED flag only set on common snode */
    298 	mutex_enter(&csp->s_lock);
    299 	if (fence)
    300 		csp->s_flag |= SFENCED;
    301 	else
    302 		csp->s_flag &= ~SFENCED;
    303 	mutex_exit(&csp->s_lock);
    304 
    305 	FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
    306 	    fence ? "" : "un", (void *)csp, (void *)ndip));
    307 }
    308 
    309 /*
    310  * Associate the common snode with a devinfo node.  This is called from:
    311  *
    312  *   1) specvp_devfs to associate a specfs node with the dip attached
    313  *	by devfs.
    314  *
    315  *   2) spec_open after path reconstruction and attach.
    316  *
    317  *   3) From dacf processing to associate a makespecvp node with
    318  *	the dip that dacf postattach processing is being performed on.
    319  *	This association is made prior to open to avoid recursion issues.
    320  *
    321  *   4) From ddi_assoc_queue_with_devi to change vnode association as part of
    322  *	DL_ATTACH/DL_DETACH processing (SDIPSET already set).  The call
    323  *	from ddi_assoc_queue_with_devi may specify a NULL dip.
    324  *
    325  * We put an extra hold on the devinfo node passed in as we establish it as
    326  * the new s_dip pointer.  Any hold associated with the prior s_dip pointer
    327  * is released. The new hold will stay active until another call to
    328  * spec_assoc_vp_with_devi or until the common snode is destroyed by
    329  * spec_inactive after the last VN_RELE of the common node. This devinfo hold
    330  * transfers across a clone open except in the clone_dev case, where the clone
    331  * driver is no longer required after open.
    332  *
    333  * When SDIPSET is set and s_dip is NULL, the vnode has an association with
    334  * the driver even though there is currently no association with a specific
    335  * hardware instance.
    336  */
    337 void
    338 spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
    339 {
    340 	struct snode	*csp;
    341 	dev_info_t	*olddip;
    342 
    343 	ASSERT(vp);
    344 
    345 	/*
    346 	 * Don't establish a NULL association for a vnode associated with the
    347 	 * clone driver.  The qassociate(, -1) call from a streams driver's
    348 	 * open implementation to indicate support for qassociate has the
    349 	 * side-effect of this type of spec_assoc_vp_with_devi call. This
    350 	 * call should not change the the association of the pre-clone
    351 	 * vnode associated with the clone driver, the post-clone newdev
    352 	 * association will be established later by spec_clone().
    353 	 */
    354 	if ((dip == NULL) && (getmajor(vp->v_rdev) == clone_major))
    355 		return;
    356 
    357 	/* hold the new */
    358 	if (dip)
    359 		e_ddi_hold_devi(dip);
    360 
    361 	csp = VTOS(VTOS(vp)->s_commonvp);
    362 	mutex_enter(&csp->s_lock);
    363 	olddip = csp->s_dip;
    364 	csp->s_dip = dip;
    365 	csp->s_flag |= SDIPSET;
    366 
    367 	/* If association changes then invalidate cached size */
    368 	if (olddip != dip)
    369 		csp->s_flag &= ~SSIZEVALID;
    370 	mutex_exit(&csp->s_lock);
    371 
    372 	spec_assoc_fence(dip, vp);
    373 
    374 	/* release the old */
    375 	if (olddip)
    376 		ddi_release_devi(olddip);
    377 }
    378 
    379 /*
    380  * Return the held dip associated with the specified snode.
    381  */
    382 dev_info_t *
    383 spec_hold_devi_by_vp(struct vnode *vp)
    384 {
    385 	struct snode	*csp;
    386 	dev_info_t	*dip;
    387 
    388 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
    389 
    390 	csp = VTOS(VTOS(vp)->s_commonvp);
    391 	dip = csp->s_dip;
    392 	if (dip)
    393 		e_ddi_hold_devi(dip);
    394 	return (dip);
    395 }
    396 
    397 /*
    398  * Find a special vnode that refers to the given device
    399  * of the given type.  Never return a "common" vnode.
    400  * Return NULL if a special vnode does not exist.
    401  * HOLD the vnode before returning it.
    402  */
    403 struct vnode *
    404 specfind(dev_t dev, vtype_t type)
    405 {
    406 	struct snode *st;
    407 	struct vnode *nvp;
    408 
    409 	mutex_enter(&stable_lock);
    410 	st = stable[STABLEHASH(dev)];
    411 	while (st != NULL) {
    412 		if (st->s_dev == dev) {
    413 			nvp = STOV(st);
    414 			if (nvp->v_type == type && st->s_commonvp != nvp) {
    415 				VN_HOLD(nvp);
    416 				mutex_exit(&stable_lock);
    417 				return (nvp);
    418 			}
    419 		}
    420 		st = st->s_next;
    421 	}
    422 	mutex_exit(&stable_lock);
    423 	return (NULL);
    424 }
    425 
    426 /*
    427  * Loop through the snode cache looking for snodes referencing dip.
    428  *
    429  * This function determines if a devinfo node is "BUSY" from the perspective
    430  * of having an active vnode associated with the device, which represents a
    431  * dependency on the device's services.  This function is needed because a
    432  * devinfo node can have a non-zero devi_ref and still NOT be "BUSY" when,
    433  * for instance, the framework is manipulating the node (has an open
    434  * ndi_hold_devi).
    435  *
    436  * Returns:
    437  *	DEVI_REFERENCED		- if dip is referenced
    438  *	DEVI_NOT_REFERENCED	- if dip is not referenced
    439  */
    440 int
    441 devi_stillreferenced(dev_info_t *dip)
    442 {
    443 	struct snode	*sp;
    444 	int		i;
    445 
    446 	/* if no hold then there can't be an snode with s_dip == dip */
    447 	if (e_ddi_devi_holdcnt(dip) == 0)
    448 		return (DEVI_NOT_REFERENCED);
    449 
    450 	mutex_enter(&stable_lock);
    451 	for (i = 0; i < STABLESIZE; i++) {
    452 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
    453 			if (sp->s_dip == dip) {
    454 				mutex_exit(&stable_lock);
    455 				return (DEVI_REFERENCED);
    456 			}
    457 		}
    458 	}
    459 	mutex_exit(&stable_lock);
    460 	return (DEVI_NOT_REFERENCED);
    461 }
    462 
    463 /*
    464  * Given an snode, returns the open count and the dip
    465  * associated with that snode
    466  * Assumes the caller holds the appropriate locks
    467  * to prevent snode and/or dip from going away.
    468  * Returns:
    469  *	-1	No associated dip
    470  *	>= 0	Number of opens.
    471  */
    472 int
    473 spec_devi_open_count(struct snode *sp, dev_info_t **dipp)
    474 {
    475 	dev_info_t *dip;
    476 	uint_t count;
    477 	struct vnode *vp;
    478 
    479 	ASSERT(sp);
    480 	ASSERT(dipp);
    481 
    482 	vp = STOV(sp);
    483 
    484 	*dipp = NULL;
    485 
    486 	/*
    487 	 * We are only interested in common snodes. Only common snodes
    488 	 * get their s_count fields bumped up on opens.
    489 	 */
    490 	if (sp->s_commonvp != vp || (dip = sp->s_dip) == NULL)
    491 		return (-1);
    492 
    493 	mutex_enter(&sp->s_lock);
    494 	count = sp->s_count + sp->s_mapcnt;
    495 	if (sp->s_flag & SLOCKED)
    496 		count++;
    497 	mutex_exit(&sp->s_lock);
    498 
    499 	*dipp = dip;
    500 
    501 	return (count);
    502 }
    503 
    504 /*
    505  * Given a device vnode, return the common
    506  * vnode associated with it.
    507  */
    508 struct vnode *
    509 common_specvp(struct vnode *vp)
    510 {
    511 	struct snode *sp;
    512 
    513 	if ((vp->v_type != VBLK) && (vp->v_type != VCHR) ||
    514 	    !vn_matchops(vp, spec_getvnodeops()))
    515 		return (vp);
    516 	sp = VTOS(vp);
    517 	return (sp->s_commonvp);
    518 }
    519 
    520 /*
    521  * Returns a special vnode for the given dev.  The vnode is the
    522  * one which is "common" to all the snodes which represent the
    523  * same device.
    524  * Similar to commonvp() but doesn't acquire the stable_lock, and
    525  * may use a pre-allocated snode provided by caller.
    526  */
    527 static struct vnode *
    528 get_cvp(
    529 	dev_t		dev,
    530 	vtype_t		type,
    531 	struct snode	*nsp,		/* pre-allocated snode */
    532 	int		*used_nsp)	/* flag indicating if we use nsp */
    533 {
    534 	struct snode *sp;
    535 	struct vnode *svp;
    536 
    537 	ASSERT(MUTEX_HELD(&stable_lock));
    538 	if ((sp = sfind(dev, type, NULL)) == NULL) {
    539 		sp = nsp;		/* Use pre-allocated snode */
    540 		*used_nsp = 1;		/* return value */
    541 		svp = STOV(sp);
    542 
    543 		sp->s_realvp	= NULL;
    544 		sp->s_commonvp	= svp;		/* points to itself */
    545 		sp->s_dev	= dev;
    546 		sp->s_dip	= NULL;
    547 		sp->s_nextr	= NULL;
    548 		sp->s_list	= NULL;
    549 		sp->s_plcy	= NULL;
    550 		sp->s_size	= UNKNOWN_SIZE;
    551 		sp->s_flag	= 0;
    552 		sp->s_fsid	= specdev;
    553 		sp->s_atime	= 0;
    554 		sp->s_mtime	= 0;
    555 		sp->s_ctime	= 0;
    556 		sp->s_count	= 0;
    557 		sp->s_mapcnt	= 0;
    558 
    559 		vn_reinit(svp);
    560 		svp->v_vfsp	= &spec_vfs;
    561 		svp->v_type	= type;
    562 		svp->v_rdev	= dev;
    563 		vn_exists(svp);
    564 		sinsert(sp);
    565 	} else
    566 		*used_nsp = 0;
    567 	return (STOV(sp));
    568 }
    569 
    570 /*
    571  * Returns a special vnode for the given dev.  The vnode is the
    572  * one which is "common" to all the snodes which represent the
    573  * same device.  For use ONLY by SPECFS.
    574  */
    575 struct vnode *
    576 commonvp(dev_t dev, vtype_t type)
    577 {
    578 	struct snode *sp, *nsp;
    579 	struct vnode *svp;
    580 
    581 	/* Pre-allocate snode in case we might block */
    582 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
    583 
    584 	mutex_enter(&stable_lock);
    585 	if ((sp = sfind(dev, type, NULL)) == NULL) {
    586 		sp = nsp;		/* Use pre-alloced snode */
    587 		svp = STOV(sp);
    588 
    589 		sp->s_realvp	= NULL;
    590 		sp->s_commonvp	= svp;		/* points to itself */
    591 		sp->s_dev	= dev;
    592 		sp->s_dip	= NULL;
    593 		sp->s_nextr	= NULL;
    594 		sp->s_list	= NULL;
    595 		sp->s_plcy	= NULL;
    596 		sp->s_size	= UNKNOWN_SIZE;
    597 		sp->s_flag	= 0;
    598 		sp->s_fsid	= specdev;
    599 		sp->s_atime	= 0;
    600 		sp->s_mtime	= 0;
    601 		sp->s_ctime	= 0;
    602 		sp->s_count	= 0;
    603 		sp->s_mapcnt	= 0;
    604 
    605 		vn_reinit(svp);
    606 		svp->v_vfsp	= &spec_vfs;
    607 		svp->v_type	= type;
    608 		svp->v_rdev	= dev;
    609 		vn_exists(svp);
    610 		sinsert(sp);
    611 		mutex_exit(&stable_lock);
    612 	} else {
    613 		mutex_exit(&stable_lock);
    614 		/* Didn't need the pre-allocated snode */
    615 		kmem_cache_free(snode_cache, nsp);
    616 	}
    617 	return (STOV(sp));
    618 }
    619 
    620 /*
    621  * Snode lookup stuff.
    622  * These routines maintain a table of snodes hashed by dev so
    623  * that the snode for an dev can be found if it already exists.
    624  */
    625 struct snode *stable[STABLESIZE];
    626 int		stablesz = STABLESIZE;
    627 kmutex_t	stable_lock;
    628 
    629 /*
    630  * Put a snode in the table.
    631  */
    632 static void
    633 sinsert(struct snode *sp)
    634 {
    635 	ASSERT(MUTEX_HELD(&stable_lock));
    636 	sp->s_next = stable[STABLEHASH(sp->s_dev)];
    637 	stable[STABLEHASH(sp->s_dev)] = sp;
    638 }
    639 
    640 /*
    641  * Remove an snode from the hash table.
    642  * The realvp is not released here because spec_inactive() still
    643  * needs it to do a spec_fsync().
    644  */
    645 void
    646 sdelete(struct snode *sp)
    647 {
    648 	struct snode *st;
    649 	struct snode *stprev = NULL;
    650 
    651 	ASSERT(MUTEX_HELD(&stable_lock));
    652 	st = stable[STABLEHASH(sp->s_dev)];
    653 	while (st != NULL) {
    654 		if (st == sp) {
    655 			if (stprev == NULL)
    656 				stable[STABLEHASH(sp->s_dev)] = st->s_next;
    657 			else
    658 				stprev->s_next = st->s_next;
    659 			break;
    660 		}
    661 		stprev = st;
    662 		st = st->s_next;
    663 	}
    664 }
    665 
    666 /*
    667  * Lookup an snode by <dev, type, vp>.
    668  * ONLY looks for snodes with non-NULL s_realvp members and
    669  * common snodes (with s_commonvp pointing to its vnode).
    670  *
    671  * If vp is NULL, only return commonvp. Otherwise return
    672  * shadow vp with both shadow and common vp's VN_HELD.
    673  */
    674 static struct snode *
    675 sfind(
    676 	dev_t	dev,
    677 	vtype_t	type,
    678 	struct vnode *vp)
    679 {
    680 	struct snode *st;
    681 	struct vnode *svp;
    682 
    683 	ASSERT(MUTEX_HELD(&stable_lock));
    684 	st = stable[STABLEHASH(dev)];
    685 	while (st != NULL) {
    686 		svp = STOV(st);
    687 		if (st->s_dev == dev && svp->v_type == type &&
    688 		    VN_CMP(st->s_realvp, vp) &&
    689 		    (vp != NULL || st->s_commonvp == svp) &&
    690 		    (vp == NULL || st->s_realvp->v_vfsp == vp->v_vfsp)) {
    691 			VN_HOLD(svp);
    692 			return (st);
    693 		}
    694 		st = st->s_next;
    695 	}
    696 	return (NULL);
    697 }
    698 
    699 /*
    700  * Mark the accessed, updated, or changed times in an snode
    701  * with the current time.
    702  */
    703 void
    704 smark(struct snode *sp, int flag)
    705 {
    706 	time_t	now = gethrestime_sec();
    707 
    708 	/* check for change to avoid unnecessary locking */
    709 	ASSERT((flag & ~(SACC|SUPD|SCHG)) == 0);
    710 	if (((flag & sp->s_flag) != flag) ||
    711 	    ((flag & SACC) && (sp->s_atime != now)) ||
    712 	    ((flag & SUPD) && (sp->s_mtime != now)) ||
    713 	    ((flag & SCHG) && (sp->s_ctime != now))) {
    714 		/* lock and update */
    715 		mutex_enter(&sp->s_lock);
    716 		sp->s_flag |= flag;
    717 		if (flag & SACC)
    718 			sp->s_atime = now;
    719 		if (flag & SUPD)
    720 			sp->s_mtime = now;
    721 		if (flag & SCHG)
    722 			sp->s_ctime = now;
    723 		mutex_exit(&sp->s_lock);
    724 	}
    725 }
    726 
    727 /*
    728  * Return the maximum file offset permitted for this device.
    729  * -1 means unrestricted.  SLOFFSET is associated with D_64BIT.
    730  *
    731  * On a 32-bit kernel this will limit:
    732  *   o	D_64BIT devices to SPEC_MAXOFFSET_T.
    733  *   o	non-D_64BIT character drivers to a 32-bit offset (MAXOFF_T).
    734  */
    735 offset_t
    736 spec_maxoffset(struct vnode *vp)
    737 {
    738 	struct snode *sp = VTOS(vp);
    739 	struct snode *csp = VTOS(sp->s_commonvp);
    740 
    741 	if (vp->v_stream)
    742 		return ((offset_t)-1);
    743 	else if (csp->s_flag & SANYOFFSET)	/* D_U64BIT */
    744 		return ((offset_t)-1);
    745 #ifdef _ILP32
    746 	if (csp->s_flag & SLOFFSET)		/* D_64BIT */
    747 		return (SPEC_MAXOFFSET_T);
    748 #endif	/* _ILP32 */
    749 	return (MAXOFF_T);
    750 }
    751 
    752 /*ARGSUSED*/
    753 static int
    754 snode_constructor(void *buf, void *cdrarg, int kmflags)
    755 {
    756 	struct snode *sp = buf;
    757 	struct vnode *vp;
    758 
    759 	vp = sp->s_vnode = vn_alloc(kmflags);
    760 	if (vp == NULL) {
    761 		return (-1);
    762 	}
    763 	vn_setops(vp, spec_getvnodeops());
    764 	vp->v_data = sp;
    765 
    766 	mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
    767 	cv_init(&sp->s_cv, NULL, CV_DEFAULT, NULL);
    768 	return (0);
    769 }
    770 
    771 /*ARGSUSED1*/
    772 static void
    773 snode_destructor(void *buf, void *cdrarg)
    774 {
    775 	struct snode *sp = buf;
    776 	struct vnode *vp = STOV(sp);
    777 
    778 	mutex_destroy(&sp->s_lock);
    779 	cv_destroy(&sp->s_cv);
    780 
    781 	vn_free(vp);
    782 }
    783 
    784 
    785 int
    786 specinit(int fstype, char *name)
    787 {
    788 	static const fs_operation_def_t spec_vfsops_template[] = {
    789 		VFSNAME_SYNC, { .vfs_sync = spec_sync },
    790 		NULL, NULL
    791 	};
    792 	extern struct vnodeops *spec_vnodeops;
    793 	extern const fs_operation_def_t spec_vnodeops_template[];
    794 	struct vfsops *spec_vfsops;
    795 	int error;
    796 	dev_t dev;
    797 
    798 	/*
    799 	 * Associate vfs and vnode operations.
    800 	 */
    801 	error = vfs_setfsops(fstype, spec_vfsops_template, &spec_vfsops);
    802 	if (error != 0) {
    803 		cmn_err(CE_WARN, "specinit: bad vfs ops template");
    804 		return (error);
    805 	}
    806 
    807 	error = vn_make_ops(name, spec_vnodeops_template, &spec_vnodeops);
    808 	if (error != 0) {
    809 		(void) vfs_freevfsops_by_type(fstype);
    810 		cmn_err(CE_WARN, "specinit: bad vnode ops template");
    811 		return (error);
    812 	}
    813 
    814 	mutex_init(&stable_lock, NULL, MUTEX_DEFAULT, NULL);
    815 	mutex_init(&spec_syncbusy, NULL, MUTEX_DEFAULT, NULL);
    816 
    817 	/*
    818 	 * Create snode cache
    819 	 */
    820 	snode_cache = kmem_cache_create("snode_cache", sizeof (struct snode),
    821 	    0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0);
    822 
    823 	/*
    824 	 * Associate vfs operations with spec_vfs
    825 	 */
    826 	VFS_INIT(&spec_vfs, spec_vfsops, (caddr_t)NULL);
    827 	if ((dev = getudev()) == -1)
    828 		dev = 0;
    829 	specdev = makedevice(dev, 0);
    830 	return (0);
    831 }
    832 
    833 int
    834 device_close(struct vnode *vp, int flag, struct cred *cr)
    835 {
    836 	struct snode *sp = VTOS(vp);
    837 	enum vtype type = vp->v_type;
    838 	struct vnode *cvp;
    839 	dev_t dev;
    840 	int error;
    841 
    842 	dev = sp->s_dev;
    843 	cvp = sp->s_commonvp;
    844 
    845 	switch (type) {
    846 
    847 	case VCHR:
    848 		if (vp->v_stream) {
    849 			if (cvp->v_stream != NULL)
    850 				error = strclose(cvp, flag, cr);
    851 			vp->v_stream = NULL;
    852 		} else
    853 			error = dev_close(dev, flag, OTYP_CHR, cr);
    854 		break;
    855 
    856 	case VBLK:
    857 		/*
    858 		 * On last close a block device we must
    859 		 * invalidate any in-core blocks so that we
    860 		 * can, for example, change floppy disks.
    861 		 */
    862 		(void) spec_putpage(cvp, (offset_t)0,
    863 		    (size_t)0, B_INVAL|B_FORCE, cr, NULL);
    864 		bflush(dev);
    865 		binval(dev);
    866 		error = dev_close(dev, flag, OTYP_BLK, cr);
    867 		break;
    868 	default:
    869 		panic("device_close: not a device");
    870 		/*NOTREACHED*/
    871 	}
    872 
    873 	return (error);
    874 }
    875 
    876 struct vnode *
    877 makectty(vnode_t *ovp)
    878 {
    879 	vnode_t *vp;
    880 
    881 	if (vp = makespecvp(ovp->v_rdev, VCHR)) {
    882 		struct snode *sp;
    883 		struct snode *csp;
    884 		struct vnode *cvp;
    885 
    886 		sp = VTOS(vp);
    887 		cvp = sp->s_commonvp;
    888 		csp = VTOS(cvp);
    889 		mutex_enter(&csp->s_lock);
    890 		csp->s_count++;
    891 		mutex_exit(&csp->s_lock);
    892 	}
    893 
    894 	return (vp);
    895 }
    896 
    897 void
    898 spec_snode_walk(int (*callback)(struct snode *sp, void *arg), void *arg)
    899 {
    900 	struct snode	*sp;
    901 	int		i;
    902 
    903 	ASSERT(callback);
    904 
    905 	mutex_enter(&stable_lock);
    906 	for (i = 0; i < STABLESIZE; i++) {
    907 		for (sp = stable[i]; sp; sp = sp->s_next) {
    908 			if (callback(sp, arg) != DDI_WALK_CONTINUE)
    909 				goto out;
    910 		}
    911 	}
    912 out:
    913 	mutex_exit(&stable_lock);
    914 }
    915 
    916 int
    917 spec_is_clone(vnode_t *vp)
    918 {
    919 	struct snode *sp;
    920 
    921 	if (vn_matchops(vp, spec_getvnodeops())) {
    922 		sp = VTOS(vp);
    923 		return ((sp->s_flag & SCLONE) ? 1 : 0);
    924 	}
    925 
    926 	return (0);
    927 }
    928 
    929 int
    930 spec_is_selfclone(vnode_t *vp)
    931 {
    932 	struct snode *sp;
    933 
    934 	if (vn_matchops(vp, spec_getvnodeops())) {
    935 		sp = VTOS(vp);
    936 		return ((sp->s_flag & SSELFCLONE) ? 1 : 0);
    937 	}
    938 
    939 	return (0);
    940 }
    941 
    942 /*
    943  * We may be invoked with a NULL vp in which case we fence off
    944  * all snodes associated with dip
    945  */
    946 int
    947 spec_fence_snode(dev_info_t *dip, struct vnode *vp)
    948 {
    949 	struct snode	*sp;
    950 	struct snode	*csp;
    951 	int		retired;
    952 	int		i;
    953 	char		*path;
    954 	int		emitted;
    955 
    956 	ASSERT(dip);
    957 
    958 	retired = 0;
    959 	mutex_enter(&DEVI(dip)->devi_lock);
    960 	if (DEVI(dip)->devi_flags & DEVI_RETIRED)
    961 		retired = 1;
    962 	mutex_exit(&DEVI(dip)->devi_lock);
    963 
    964 	if (!retired)
    965 		return (0);
    966 
    967 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    968 	(void) ddi_pathname(dip, path);
    969 
    970 
    971 	if (vp != NULL) {
    972 		ASSERT(vn_matchops(vp, spec_getvnodeops()));
    973 		csp = VTOCS(vp);
    974 		ASSERT(csp);
    975 		mutex_enter(&csp->s_lock);
    976 		csp->s_flag |= SFENCED;
    977 		mutex_exit(&csp->s_lock);
    978 		FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
    979 		    (void *)csp, path));
    980 		kmem_free(path, MAXPATHLEN);
    981 		return (0);
    982 	}
    983 
    984 	emitted = 0;
    985 	mutex_enter(&stable_lock);
    986 	for (i = 0; i < STABLESIZE; i++) {
    987 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
    988 			ASSERT(sp->s_commonvp);
    989 			csp = VTOS(sp->s_commonvp);
    990 			if (csp->s_dip == dip) {
    991 				/* fence off the common snode */
    992 				mutex_enter(&csp->s_lock);
    993 				csp->s_flag |= SFENCED;
    994 				mutex_exit(&csp->s_lock);
    995 				if (!emitted) {
    996 					FENDBG((CE_NOTE, "fenced 1 of N"));
    997 					emitted++;
    998 				}
    999 			}
   1000 		}
   1001 	}
   1002 	mutex_exit(&stable_lock);
   1003 
   1004 	FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
   1005 	kmem_free(path, MAXPATHLEN);
   1006 
   1007 	return (0);
   1008 }
   1009 
   1010 
   1011 int
   1012 spec_unfence_snode(dev_info_t *dip)
   1013 {
   1014 	struct snode	*sp;
   1015 	struct snode	*csp;
   1016 	int		i;
   1017 	char		*path;
   1018 	int		emitted;
   1019 
   1020 	ASSERT(dip);
   1021 
   1022 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1023 	(void) ddi_pathname(dip, path);
   1024 
   1025 	emitted = 0;
   1026 	mutex_enter(&stable_lock);
   1027 	for (i = 0; i < STABLESIZE; i++) {
   1028 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
   1029 			ASSERT(sp->s_commonvp);
   1030 			csp = VTOS(sp->s_commonvp);
   1031 			ASSERT(csp);
   1032 			if (csp->s_dip == dip) {
   1033 				/* unfence the common snode */
   1034 				mutex_enter(&csp->s_lock);
   1035 				csp->s_flag &= ~SFENCED;
   1036 				mutex_exit(&csp->s_lock);
   1037 				if (!emitted) {
   1038 					FENDBG((CE_NOTE, "unfenced 1 of N"));
   1039 					emitted++;
   1040 				}
   1041 			}
   1042 		}
   1043 	}
   1044 	mutex_exit(&stable_lock);
   1045 
   1046 	FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
   1047 	kmem_free(path, MAXPATHLEN);
   1048 
   1049 	return (0);
   1050 }
   1051 
   1052 void
   1053 spec_size_invalidate(dev_t dev, vtype_t type)
   1054 {
   1055 
   1056 	struct snode *csp;
   1057 
   1058 	mutex_enter(&stable_lock);
   1059 	if ((csp = sfind(dev, type, NULL)) != NULL) {
   1060 		mutex_enter(&csp->s_lock);
   1061 		csp->s_flag &= ~SSIZEVALID;
   1062 		VN_RELE(STOV(csp));
   1063 		mutex_exit(&csp->s_lock);
   1064 	}
   1065 	mutex_exit(&stable_lock);
   1066 }
   1067