Home | History | Annotate | Download | only in fs
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1488       rsb  * Common Development and Distribution License (the "License").
      6   1488       rsb  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*
     22   9321      Neil  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0    stevel  * Use is subject to license terms.
     24      0    stevel  */
     25      0    stevel 
     26      0    stevel /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     27      0    stevel /*	  All Rights Reserved  	*/
     28      0    stevel 
     29      0    stevel /*
     30      0    stevel  * University Copyright- Copyright (c) 1982, 1986, 1988
     31      0    stevel  * The Regents of the University of California
     32      0    stevel  * All Rights Reserved
     33      0    stevel  *
     34      0    stevel  * University Acknowledgment- Portions of this document are derived from
     35      0    stevel  * software developed by the University of California, Berkeley, and its
     36      0    stevel  * contributors.
     37      0    stevel  */
     38      0    stevel 
     39      0    stevel #include <sys/types.h>
     40      0    stevel #include <sys/param.h>
     41      0    stevel #include <sys/t_lock.h>
     42      0    stevel #include <sys/errno.h>
     43      0    stevel #include <sys/cred.h>
     44      0    stevel #include <sys/user.h>
     45      0    stevel #include <sys/uio.h>
     46      0    stevel #include <sys/file.h>
     47      0    stevel #include <sys/pathname.h>
     48      0    stevel #include <sys/vfs.h>
     49   3898       rsb #include <sys/vfs_opreg.h>
     50      0    stevel #include <sys/vnode.h>
     51      0    stevel #include <sys/rwstlock.h>
     52      0    stevel #include <sys/fem.h>
     53      0    stevel #include <sys/stat.h>
     54      0    stevel #include <sys/mode.h>
     55      0    stevel #include <sys/conf.h>
     56      0    stevel #include <sys/sysmacros.h>
     57      0    stevel #include <sys/cmn_err.h>
     58      0    stevel #include <sys/systm.h>
     59      0    stevel #include <sys/kmem.h>
     60      0    stevel #include <sys/debug.h>
     61      0    stevel #include <c2/audit.h>
     62      0    stevel #include <sys/acl.h>
     63      0    stevel #include <sys/nbmlock.h>
     64      0    stevel #include <sys/fcntl.h>
     65      0    stevel #include <fs/fs_subr.h>
     66   9321      Neil #include <sys/taskq.h>
     67  10793       dai #include <fs/fs_reparse.h>
     68      0    stevel 
     69      0    stevel /* Determine if this vnode is a file that is read-only */
     70      0    stevel #define	ISROFILE(vp)	\
     71      0    stevel 	((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
     72      0    stevel 	    (vp)->v_type != VFIFO && vn_is_readonly(vp))
     73    766  carlsonj 
     74    766  carlsonj /* Tunable via /etc/system; used only by admin/install */
     75    766  carlsonj int nfs_global_client_only;
     76   1488       rsb 
     77   1488       rsb /*
     78   1488       rsb  * Array of vopstats_t for per-FS-type vopstats.  This array has the same
     79   1488       rsb  * number of entries as and parallel to the vfssw table.  (Arguably, it could
     80   1488       rsb  * be part of the vfssw table.)  Once it's initialized, it's accessed using
     81   1488       rsb  * the same fstype index that is used to index into the vfssw table.
     82   1488       rsb  */
     83   1488       rsb vopstats_t **vopstats_fstype;
     84   1488       rsb 
     85   1488       rsb /* vopstats initialization template used for fast initialization via bcopy() */
     86   1488       rsb static vopstats_t *vs_templatep;
     87   1488       rsb 
     88   1488       rsb /* Kmem cache handle for vsk_anchor_t allocations */
     89   1488       rsb kmem_cache_t *vsk_anchor_cache;
     90   4863     praks 
     91   4863     praks /* file events cleanup routine */
     92   4863     praks extern void free_fopdata(vnode_t *);
     93   1488       rsb 
     94   1488       rsb /*
     95   1488       rsb  * Root of AVL tree for the kstats associated with vopstats.  Lock protects
     96   1488       rsb  * updates to vsktat_tree.
     97   1488       rsb  */
     98   1488       rsb avl_tree_t	vskstat_tree;
     99   1488       rsb kmutex_t	vskstat_tree_lock;
    100   1488       rsb 
    101   1488       rsb /* Global variable which enables/disables the vopstats collection */
    102   1488       rsb int vopstats_enabled = 1;
    103   5050   jwahlig 
    104   5050   jwahlig /*
    105   5050   jwahlig  * forward declarations for internal vnode specific data (vsd)
    106   5050   jwahlig  */
    107   5050   jwahlig static void *vsd_realloc(void *, size_t, size_t);
    108  10793       dai 
    109  10793       dai /*
    110  10793       dai  * forward declarations for reparse point functions
    111  10793       dai  */
    112  10793       dai static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
    113   5050   jwahlig 
    114   5050   jwahlig /*
    115   5050   jwahlig  * VSD -- VNODE SPECIFIC DATA
    116   5050   jwahlig  * The v_data pointer is typically used by a file system to store a
    117   5050   jwahlig  * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
    118   5050   jwahlig  * However, there are times when additional project private data needs
    119   5050   jwahlig  * to be stored separately from the data (node) pointed to by v_data.
    120   5050   jwahlig  * This additional data could be stored by the file system itself or
    121   5050   jwahlig  * by a completely different kernel entity.  VSD provides a way for
    122   5050   jwahlig  * callers to obtain a key and store a pointer to private data associated
    123   5050   jwahlig  * with a vnode.
    124   5050   jwahlig  *
    125   9885    Robert  * Callers are responsible for protecting the vsd by holding v_vsd_lock
    126   5050   jwahlig  * for calls to vsd_set() and vsd_get().
    127   5050   jwahlig  */
    128   5050   jwahlig 
    129   5050   jwahlig /*
    130   5050   jwahlig  * vsd_lock protects:
    131   5050   jwahlig  *   vsd_nkeys - creation and deletion of vsd keys
    132   5050   jwahlig  *   vsd_list - insertion and deletion of vsd_node in the vsd_list
    133   5050   jwahlig  *   vsd_destructor - adding and removing destructors to the list
    134   5050   jwahlig  */
    135   5050   jwahlig static kmutex_t		vsd_lock;
    136   5050   jwahlig static uint_t		vsd_nkeys;	 /* size of destructor array */
    137   5050   jwahlig /* list of vsd_node's */
    138   5050   jwahlig static list_t *vsd_list = NULL;
    139   5050   jwahlig /* per-key destructor funcs */
    140   5050   jwahlig static void 		(**vsd_destructor)(void *);
    141   1488       rsb 
    142   1488       rsb /*
    143   1488       rsb  * The following is the common set of actions needed to update the
    144   1488       rsb  * vopstats structure from a vnode op.  Both VOPSTATS_UPDATE() and
    145   1488       rsb  * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
    146   1488       rsb  * recording of the bytes transferred.  Since the code is similar
    147   1488       rsb  * but small, it is nearly a duplicate.  Consequently any changes
    148   1488       rsb  * to one may need to be reflected in the other.
    149   1488       rsb  * Rundown of the variables:
    150   1488       rsb  * vp - Pointer to the vnode
    151   1488       rsb  * counter - Partial name structure member to update in vopstats for counts
    152   1488       rsb  * bytecounter - Partial name structure member to update in vopstats for bytes
    153   1488       rsb  * bytesval - Value to update in vopstats for bytes
    154   1488       rsb  * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
    155   1488       rsb  * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
    156   1488       rsb  */
    157   1488       rsb 
    158   1488       rsb #define	VOPSTATS_UPDATE(vp, counter) {					\
    159   1488       rsb 	vfs_t *vfsp = (vp)->v_vfsp;					\
    160   1925       rsb 	if (vfsp && vfsp->vfs_implp &&					\
    161   1925       rsb 	    (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {	\
    162   1488       rsb 		vopstats_t *vsp = &vfsp->vfs_vopstats;			\
    163   1738       bmc 		uint64_t *stataddr = &(vsp->n##counter.value.ui64);	\
    164   1738       bmc 		extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
    165   1738       bmc 		    size_t, uint64_t *);				\
    166   1738       bmc 		__dtrace_probe___fsinfo_##counter(vp, 0, stataddr);	\
    167   1738       bmc 		(*stataddr)++;						\
    168   1488       rsb 		if ((vsp = vfsp->vfs_fstypevsp) != NULL) {		\
    169   1738       bmc 			vsp->n##counter.value.ui64++;			\
    170   1488       rsb 		}							\
    171   1488       rsb 	}								\
    172   1488       rsb }
    173   1488       rsb 
    174   1488       rsb #define	VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) {	\
    175   1488       rsb 	vfs_t *vfsp = (vp)->v_vfsp;					\
    176   1925       rsb 	if (vfsp && vfsp->vfs_implp &&					\
    177   1925       rsb 	    (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {	\
    178   1488       rsb 		vopstats_t *vsp = &vfsp->vfs_vopstats;			\
    179   1738       bmc 		uint64_t *stataddr = &(vsp->n##counter.value.ui64);	\
    180   1738       bmc 		extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
    181   1738       bmc 		    size_t, uint64_t *);				\
    182   1738       bmc 		__dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
    183   1738       bmc 		(*stataddr)++;						\
    184   1488       rsb 		vsp->bytecounter.value.ui64 += bytesval;		\
    185   1488       rsb 		if ((vsp = vfsp->vfs_fstypevsp) != NULL) {		\
    186   1738       bmc 			vsp->n##counter.value.ui64++;			\
    187   1488       rsb 			vsp->bytecounter.value.ui64 += bytesval;	\
    188   1488       rsb 		}							\
    189   1488       rsb 	}								\
    190   1488       rsb }
    191   4321    casper 
    192   4321    casper /*
    193   4321    casper  * If the filesystem does not support XIDs map credential
    194   4321    casper  * If the vfsp is NULL, perhaps we should also map?
    195   4321    casper  */
    196   4321    casper #define	VOPXID_MAP_CR(vp, cr)	{					\
    197   4321    casper 	vfs_t *vfsp = (vp)->v_vfsp;					\
    198   4321    casper 	if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)		\
    199   4321    casper 		cr = crgetmapped(cr);					\
    200   4321    casper 	}
    201      0    stevel 
    202      0    stevel /*
    203      0    stevel  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
    204      0    stevel  * numerical order of S_IFMT and vnode types.)
    205      0    stevel  */
    206      0    stevel enum vtype iftovt_tab[] = {
    207      0    stevel 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
    208      0    stevel 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
    209      0    stevel };
    210      0    stevel 
    211      0    stevel ushort_t vttoif_tab[] = {
    212      0    stevel 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
    213      0    stevel 	S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
    214      0    stevel };
    215      0    stevel 
    216      0    stevel /*
    217      0    stevel  * The system vnode cache.
    218      0    stevel  */
    219      0    stevel 
    220      0    stevel kmem_cache_t *vn_cache;
    221      0    stevel 
    222      0    stevel 
    223      0    stevel /*
    224      0    stevel  * Vnode operations vector.
    225      0    stevel  */
    226      0    stevel 
    227      0    stevel static const fs_operation_trans_def_t vn_ops_table[] = {
    228      0    stevel 	VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
    229      0    stevel 	    fs_nosys, fs_nosys,
    230      0    stevel 
    231      0    stevel 	VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
    232      0    stevel 	    fs_nosys, fs_nosys,
    233      0    stevel 
    234      0    stevel 	VOPNAME_READ, offsetof(struct vnodeops, vop_read),
    235      0    stevel 	    fs_nosys, fs_nosys,
    236      0    stevel 
    237      0    stevel 	VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
    238      0    stevel 	    fs_nosys, fs_nosys,
    239      0    stevel 
    240      0    stevel 	VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
    241      0    stevel 	    fs_nosys, fs_nosys,
    242      0    stevel 
    243      0    stevel 	VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
    244      0    stevel 	    fs_setfl, fs_nosys,
    245      0    stevel 
    246      0    stevel 	VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
    247      0    stevel 	    fs_nosys, fs_nosys,
    248      0    stevel 
    249      0    stevel 	VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
    250      0    stevel 	    fs_nosys, fs_nosys,
    251      0    stevel 
    252      0    stevel 	VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
    253      0    stevel 	    fs_nosys, fs_nosys,
    254      0    stevel 
    255      0    stevel 	VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
    256      0    stevel 	    fs_nosys, fs_nosys,
    257      0    stevel 
    258      0    stevel 	VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
    259      0    stevel 	    fs_nosys, fs_nosys,
    260      0    stevel 
    261      0    stevel 	VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
    262      0    stevel 	    fs_nosys, fs_nosys,
    263      0    stevel 
    264      0    stevel 	VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
    265      0    stevel 	    fs_nosys, fs_nosys,
    266      0    stevel 
    267      0    stevel 	VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
    268      0    stevel 	    fs_nosys, fs_nosys,
    269      0    stevel 
    270      0    stevel 	VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
    271      0    stevel 	    fs_nosys, fs_nosys,
    272      0    stevel 
    273      0    stevel 	VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
    274      0    stevel 	    fs_nosys, fs_nosys,
    275      0    stevel 
    276      0    stevel 	VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
    277      0    stevel 	    fs_nosys, fs_nosys,
    278      0    stevel 
    279      0    stevel 	VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
    280      0    stevel 	    fs_nosys, fs_nosys,
    281      0    stevel 
    282      0    stevel 	VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
    283      0    stevel 	    fs_nosys, fs_nosys,
    284      0    stevel 
    285      0    stevel 	VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
    286      0    stevel 	    fs_nosys, fs_nosys,
    287      0    stevel 
    288      0    stevel 	VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
    289      0    stevel 	    fs_nosys, fs_nosys,
    290      0    stevel 
    291      0    stevel 	VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
    292      0    stevel 	    fs_nosys, fs_nosys,
    293      0    stevel 
    294      0    stevel 	VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
    295      0    stevel 	    fs_rwlock, fs_rwlock,
    296      0    stevel 
    297      0    stevel 	VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
    298      0    stevel 	    (fs_generic_func_p) fs_rwunlock,
    299      0    stevel 	    (fs_generic_func_p) fs_rwunlock,	/* no errors allowed */
    300      0    stevel 
    301      0    stevel 	VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
    302      0    stevel 	    fs_nosys, fs_nosys,
    303      0    stevel 
    304      0    stevel 	VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
    305      0    stevel 	    fs_cmp, fs_cmp,		/* no errors allowed */
    306      0    stevel 
    307      0    stevel 	VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
    308      0    stevel 	    fs_frlock, fs_nosys,
    309      0    stevel 
    310      0    stevel 	VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
    311      0    stevel 	    fs_nosys, fs_nosys,
    312      0    stevel 
    313      0    stevel 	VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
    314      0    stevel 	    fs_nosys, fs_nosys,
    315      0    stevel 
    316      0    stevel 	VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
    317      0    stevel 	    fs_nosys, fs_nosys,
    318      0    stevel 
    319      0    stevel 	VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
    320      0    stevel 	    fs_nosys, fs_nosys,
    321      0    stevel 
    322      0    stevel 	VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
    323      0    stevel 	    (fs_generic_func_p) fs_nosys_map,
    324      0    stevel 	    (fs_generic_func_p) fs_nosys_map,
    325      0    stevel 
    326      0    stevel 	VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
    327      0    stevel 	    (fs_generic_func_p) fs_nosys_addmap,
    328      0    stevel 	    (fs_generic_func_p) fs_nosys_addmap,
    329      0    stevel 
    330      0    stevel 	VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
    331      0    stevel 	    fs_nosys, fs_nosys,
    332      0    stevel 
    333      0    stevel 	VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
    334      0    stevel 	    (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
    335      0    stevel 
    336      0    stevel 	VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
    337      0    stevel 	    fs_nosys, fs_nosys,
    338      0    stevel 
    339      0    stevel 	VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
    340      0    stevel 	    fs_pathconf, fs_nosys,
    341      0    stevel 
    342      0    stevel 	VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
    343      0    stevel 	    fs_nosys, fs_nosys,
    344      0    stevel 
    345      0    stevel 	VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
    346      0    stevel 	    fs_nosys, fs_nosys,
    347      0    stevel 
    348      0    stevel 	VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
    349      0    stevel 	    (fs_generic_func_p) fs_dispose,
    350      0    stevel 	    (fs_generic_func_p) fs_nodispose,
    351      0    stevel 
    352      0    stevel 	VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
    353      0    stevel 	    fs_nosys, fs_nosys,
    354      0    stevel 
    355      0    stevel 	VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
    356      0    stevel 	    fs_fab_acl, fs_nosys,
    357      0    stevel 
    358      0    stevel 	VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
    359      0    stevel 	    fs_shrlock, fs_nosys,
    360      0    stevel 
    361      0    stevel 	VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
    362      0    stevel 	    (fs_generic_func_p) fs_vnevent_nosupport,
    363      0    stevel 	    (fs_generic_func_p) fs_vnevent_nosupport,
    364      0    stevel 
    365      0    stevel 	NULL, 0, NULL, NULL
    366      0    stevel };
    367   5331       amw 
    368   5331       amw /* Extensible attribute (xva) routines. */
    369   5331       amw 
    370   5331       amw /*
    371   5331       amw  * Zero out the structure, set the size of the requested/returned bitmaps,
    372   5331       amw  * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
    373   5331       amw  * to the returned attributes array.
    374   5331       amw  */
    375   5331       amw void
    376   5331       amw xva_init(xvattr_t *xvap)
    377   5331       amw {
    378   5331       amw 	bzero(xvap, sizeof (xvattr_t));
    379   5331       amw 	xvap->xva_mapsize = XVA_MAPSIZE;
    380   5331       amw 	xvap->xva_magic = XVA_MAGIC;
    381   5331       amw 	xvap->xva_vattr.va_mask = AT_XVATTR;
    382   5331       amw 	xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
    383   5331       amw }
    384   5331       amw 
    385   5331       amw /*
    386   5331       amw  * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
    387   5331       amw  * structure.  Otherwise, returns NULL.
    388   5331       amw  */
    389   5331       amw xoptattr_t *
    390   5331       amw xva_getxoptattr(xvattr_t *xvap)
    391   5331       amw {
    392   5331       amw 	xoptattr_t *xoap = NULL;
    393   5331       amw 	if (xvap->xva_vattr.va_mask & AT_XVATTR)
    394   5331       amw 		xoap = &xvap->xva_xoptattrs;
    395   5331       amw 	return (xoap);
    396   5331       amw }
    397      0    stevel 
    398   1488       rsb /*
    399   1488       rsb  * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
    400   1488       rsb  * We use the f_fsid reported by VFS_STATVFS() since we use that for the
    401   1488       rsb  * kstat name.
    402   1488       rsb  */
    403   1488       rsb static int
    404   1488       rsb vska_compar(const void *n1, const void *n2)
    405   1488       rsb {
    406   1488       rsb 	int ret;
    407   1488       rsb 	ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
    408   1488       rsb 	ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
    409   1488       rsb 
    410   1488       rsb 	if (p1 < p2) {
    411   1488       rsb 		ret = -1;
    412   1488       rsb 	} else if (p1 > p2) {
    413   1488       rsb 		ret = 1;
    414   1488       rsb 	} else {
    415   1488       rsb 		ret = 0;
    416   1488       rsb 	}
    417   1488       rsb 
    418   1488       rsb 	return (ret);
    419   1488       rsb }
    420   1488       rsb 
    421   1488       rsb /*
    422   1488       rsb  * Used to create a single template which will be bcopy()ed to a newly
    423   1488       rsb  * allocated vsanchor_combo_t structure in new_vsanchor(), below.
    424   1488       rsb  */
    425   1488       rsb static vopstats_t *
    426   1488       rsb create_vopstats_template()
    427   1488       rsb {
    428   1488       rsb 	vopstats_t		*vsp;
    429   1488       rsb 
    430   1488       rsb 	vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
    431   1488       rsb 	bzero(vsp, sizeof (*vsp));	/* Start fresh */
    432   1488       rsb 
    433   1488       rsb 	/* VOP_OPEN */
    434   1488       rsb 	kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
    435   1488       rsb 	/* VOP_CLOSE */
    436   1488       rsb 	kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
    437   1488       rsb 	/* VOP_READ I/O */
    438   1488       rsb 	kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
    439   1488       rsb 	kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
    440   1488       rsb 	/* VOP_WRITE I/O */
    441   1488       rsb 	kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
    442   1488       rsb 	kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
    443   1488       rsb 	/* VOP_IOCTL */
    444   1488       rsb 	kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
    445   1488       rsb 	/* VOP_SETFL */
    446   1488       rsb 	kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
    447   1488       rsb 	/* VOP_GETATTR */
    448   1488       rsb 	kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
    449   1488       rsb 	/* VOP_SETATTR */
    450   1488       rsb 	kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
    451   1488       rsb 	/* VOP_ACCESS */
    452   1488       rsb 	kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
    453   1488       rsb 	/* VOP_LOOKUP */
    454   1488       rsb 	kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
    455   1488       rsb 	/* VOP_CREATE */
    456   1488       rsb 	kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
    457   1488       rsb 	/* VOP_REMOVE */
    458   1488       rsb 	kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
    459   1488       rsb 	/* VOP_LINK */
    460   1488       rsb 	kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
    461   1488       rsb 	/* VOP_RENAME */
    462   1488       rsb 	kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
    463   1488       rsb 	/* VOP_MKDIR */
    464   1488       rsb 	kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
    465   1488       rsb 	/* VOP_RMDIR */
    466   1488       rsb 	kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
    467   1488       rsb 	/* VOP_READDIR I/O */
    468   1488       rsb 	kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
    469   1488       rsb 	kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
    470   1488       rsb 	    KSTAT_DATA_UINT64);
    471   1488       rsb 	/* VOP_SYMLINK */
    472   1488       rsb 	kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
    473   1488       rsb 	/* VOP_READLINK */
    474   1488       rsb 	kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
    475   1488       rsb 	/* VOP_FSYNC */
    476   1488       rsb 	kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
    477   1488       rsb 	/* VOP_INACTIVE */
    478   1488       rsb 	kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
    479   1488       rsb 	/* VOP_FID */
    480   1488       rsb 	kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
    481   1488       rsb 	/* VOP_RWLOCK */
    482   1488       rsb 	kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
    483   1488       rsb 	/* VOP_RWUNLOCK */
    484   1488       rsb 	kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
    485   1488       rsb 	/* VOP_SEEK */
    486   1488       rsb 	kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
    487   1488       rsb 	/* VOP_CMP */
    488   1488       rsb 	kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
    489   1488       rsb 	/* VOP_FRLOCK */
    490   1488       rsb 	kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
    491   1488       rsb 	/* VOP_SPACE */
    492   1488       rsb 	kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
    493   1488       rsb 	/* VOP_REALVP */
    494   1488       rsb 	kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
    495   1488       rsb 	/* VOP_GETPAGE */
    496   1488       rsb 	kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
    497   1488       rsb 	/* VOP_PUTPAGE */
    498   1488       rsb 	kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
    499   1488       rsb 	/* VOP_MAP */
    500   1488       rsb 	kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
    501   1488       rsb 	/* VOP_ADDMAP */
    502   1488       rsb 	kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
    503   1488       rsb 	/* VOP_DELMAP */
    504   1488       rsb 	kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
    505   1488       rsb 	/* VOP_POLL */
    506   1488       rsb 	kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
    507   1488       rsb 	/* VOP_DUMP */
    508   1488       rsb 	kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
    509   1488       rsb 	/* VOP_PATHCONF */
    510   1488       rsb 	kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
    511   1488       rsb 	/* VOP_PAGEIO */
    512   1488       rsb 	kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
    513   1488       rsb 	/* VOP_DUMPCTL */
    514   1488       rsb 	kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
    515   1488       rsb 	/* VOP_DISPOSE */
    516   1488       rsb 	kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
    517   1488       rsb 	/* VOP_SETSECATTR */
    518   1488       rsb 	kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
    519   1488       rsb 	/* VOP_GETSECATTR */
    520   1488       rsb 	kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
    521   1488       rsb 	/* VOP_SHRLOCK */
    522   1488       rsb 	kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
    523   1488       rsb 	/* VOP_VNEVENT */
    524   1488       rsb 	kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
    525   1488       rsb 
    526   1488       rsb 	return (vsp);
    527   1488       rsb }
    528   1488       rsb 
    529   1488       rsb /*
    530   1488       rsb  * Creates a kstat structure associated with a vopstats structure.
    531   1488       rsb  */
    532   1488       rsb kstat_t *
    533   1488       rsb new_vskstat(char *ksname, vopstats_t *vsp)
    534   1488       rsb {
    535   1488       rsb 	kstat_t		*ksp;
    536   1488       rsb 
    537   1488       rsb 	if (!vopstats_enabled) {
    538   1488       rsb 		return (NULL);
    539   1488       rsb 	}
    540   1488       rsb 
    541   1488       rsb 	ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
    542   1488       rsb 	    sizeof (vopstats_t)/sizeof (kstat_named_t),
    543   1488       rsb 	    KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
    544   1488       rsb 	if (ksp) {
    545   1488       rsb 		ksp->ks_data = vsp;
    546   1488       rsb 		kstat_install(ksp);
    547   1488       rsb 	}
    548   1488       rsb 
    549   1488       rsb 	return (ksp);
    550   1488       rsb }
    551   1488       rsb 
    552   1488       rsb /*
    553   1488       rsb  * Called from vfsinit() to initialize the support mechanisms for vopstats
    554   1488       rsb  */
    555   1488       rsb void
    556   1488       rsb vopstats_startup()
    557   1488       rsb {
    558   1488       rsb 	if (!vopstats_enabled)
    559   1488       rsb 		return;
    560   1488       rsb 
    561   1488       rsb 	/*
    562   1488       rsb 	 * Creates the AVL tree which holds per-vfs vopstat anchors.  This
    563   1488       rsb 	 * is necessary since we need to check if a kstat exists before we
    564   1488       rsb 	 * attempt to create it.  Also, initialize its lock.
    565   1488       rsb 	 */
    566   1488       rsb 	avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
    567   1488       rsb 	    offsetof(vsk_anchor_t, vsk_node));
    568   1488       rsb 	mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
    569   1488       rsb 
    570   1488       rsb 	vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
    571   1488       rsb 	    sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
    572   1488       rsb 	    NULL, NULL, 0);
    573   1488       rsb 
    574   1488       rsb 	/*
    575   1488       rsb 	 * Set up the array of pointers for the vopstats-by-FS-type.
    576   1488       rsb 	 * The entries will be allocated/initialized as each file system
    577   1488       rsb 	 * goes through modload/mod_installfs.
    578   1488       rsb 	 */
    579   1488       rsb 	vopstats_fstype = (vopstats_t **)kmem_zalloc(
    580   1488       rsb 	    (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
    581   1488       rsb 
    582   1488       rsb 	/* Set up the global vopstats initialization template */
    583   1488       rsb 	vs_templatep = create_vopstats_template();
    584   1488       rsb }
    585   1488       rsb 
    586   1488       rsb /*
    587   1488       rsb  * We need to have the all of the counters zeroed.
    588   1488       rsb  * The initialization of the vopstats_t includes on the order of
    589   1488       rsb  * 50 calls to kstat_named_init().  Rather that do that on every call,
    590   1488       rsb  * we do it once in a template (vs_templatep) then bcopy it over.
    591   1488       rsb  */
    592   1488       rsb void
    593   1488       rsb initialize_vopstats(vopstats_t *vsp)
    594   1488       rsb {
    595   1488       rsb 	if (vsp == NULL)
    596   1488       rsb 		return;
    597   1488       rsb 
    598   1488       rsb 	bcopy(vs_templatep, vsp, sizeof (vopstats_t));
    599   1488       rsb }
    600   1488       rsb 
    601   1488       rsb /*
    602   1520       rsb  * If possible, determine which vopstats by fstype to use and
    603   1520       rsb  * return a pointer to the caller.
    604   1488       rsb  */
    605   1520       rsb vopstats_t *
    606   1520       rsb get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
    607   1488       rsb {
    608   1520       rsb 	int		fstype = 0;	/* Index into vfssw[] */
    609   1520       rsb 	vopstats_t	*vsp = NULL;
    610   1488       rsb 
    611   1488       rsb 	if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
    612   1488       rsb 	    !vopstats_enabled)
    613   1520       rsb 		return (NULL);
    614   1488       rsb 	/*
    615   1488       rsb 	 * Set up the fstype.  We go to so much trouble because all versions
    616   1488       rsb 	 * of NFS use the same fstype in their vfs even though they have
    617   1488       rsb 	 * distinct entries in the vfssw[] table.
    618   1520       rsb 	 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
    619   1488       rsb 	 */
    620   1520       rsb 	if (vswp) {
    621   1520       rsb 		fstype = vswp - vfssw;	/* Gets us the index */
    622   1488       rsb 	} else {
    623   1488       rsb 		fstype = vfsp->vfs_fstype;
    624   1488       rsb 	}
    625   1488       rsb 
    626   1488       rsb 	/*
    627   1488       rsb 	 * Point to the per-fstype vopstats. The only valid values are
    628   1488       rsb 	 * non-zero positive values less than the number of vfssw[] table
    629   1488       rsb 	 * entries.
    630   1488       rsb 	 */
    631   1488       rsb 	if (fstype > 0 && fstype < nfstype) {
    632   1520       rsb 		vsp = vopstats_fstype[fstype];
    633   1488       rsb 	}
    634   1520       rsb 
    635   1520       rsb 	return (vsp);
    636   1520       rsb }
    637   1520       rsb 
    638   1520       rsb /*
    639   1520       rsb  * Generate a kstat name, create the kstat structure, and allocate a
    640   1520       rsb  * vsk_anchor_t to hold it together.  Return the pointer to the vsk_anchor_t
    641   1520       rsb  * to the caller.  This must only be called from a mount.
    642   1520       rsb  */
    643   1520       rsb vsk_anchor_t *
    644   1520       rsb get_vskstat_anchor(vfs_t *vfsp)
    645   1520       rsb {
    646   1520       rsb 	char		kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
    647   1520       rsb 	statvfs64_t	statvfsbuf;		/* Needed to find f_fsid */
    648   1520       rsb 	vsk_anchor_t	*vskp = NULL;		/* vfs <--> kstat anchor */
    649   1520       rsb 	kstat_t		*ksp;			/* Ptr to new kstat */
    650   1520       rsb 	avl_index_t	where;			/* Location in the AVL tree */
    651   1520       rsb 
    652   1925       rsb 	if (vfsp == NULL || vfsp->vfs_implp == NULL ||
    653   1925       rsb 	    (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
    654   1520       rsb 		return (NULL);
    655   1488       rsb 
    656   1488       rsb 	/* Need to get the fsid to build a kstat name */
    657   1488       rsb 	if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
    658   1488       rsb 		/* Create a name for our kstats based on fsid */
    659   1488       rsb 		(void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
    660   1488       rsb 		    VOPSTATS_STR, statvfsbuf.f_fsid);
    661   1488       rsb 
    662   1488       rsb 		/* Allocate and initialize the vsk_anchor_t */
    663   1488       rsb 		vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
    664   1488       rsb 		bzero(vskp, sizeof (*vskp));
    665   1488       rsb 		vskp->vsk_fsid = statvfsbuf.f_fsid;
    666   1488       rsb 
    667   1488       rsb 		mutex_enter(&vskstat_tree_lock);
    668   1488       rsb 		if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
    669   1488       rsb 			avl_insert(&vskstat_tree, vskp, where);
    670   1488       rsb 			mutex_exit(&vskstat_tree_lock);
    671   1488       rsb 
    672   1488       rsb 			/*
    673   1488       rsb 			 * Now that we've got the anchor in the AVL
    674   1488       rsb 			 * tree, we can create the kstat.
    675   1488       rsb 			 */
    676   1488       rsb 			ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
    677   1488       rsb 			if (ksp) {
    678   1488       rsb 				vskp->vsk_ksp = ksp;
    679   1488       rsb 			}
    680   1488       rsb 		} else {
    681   1488       rsb 			/* Oops, found one! Release memory and lock. */
    682   1488       rsb 			mutex_exit(&vskstat_tree_lock);
    683   1488       rsb 			kmem_cache_free(vsk_anchor_cache, vskp);
    684   1520       rsb 			vskp = NULL;
    685   1488       rsb 		}
    686   1488       rsb 	}
    687   1520       rsb 	return (vskp);
    688   1488       rsb }
    689   1488       rsb 
    690   1488       rsb /*
    691   1488       rsb  * We're in the process of tearing down the vfs and need to cleanup
    692   1488       rsb  * the data structures associated with the vopstats. Must only be called
    693   1488       rsb  * from dounmount().
    694   1488       rsb  */
    695   1488       rsb void
    696   1488       rsb teardown_vopstats(vfs_t *vfsp)
    697   1488       rsb {
    698   1488       rsb 	vsk_anchor_t	*vskap;
    699   1488       rsb 	avl_index_t	where;
    700   1488       rsb 
    701   1925       rsb 	if (vfsp == NULL || vfsp->vfs_implp == NULL ||
    702   1925       rsb 	    (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
    703   1488       rsb 		return;
    704   1488       rsb 
    705   1488       rsb 	/* This is a safe check since VFS_STATS must be set (see above) */
    706   1488       rsb 	if ((vskap = vfsp->vfs_vskap) == NULL)
    707   1488       rsb 		return;
    708   1488       rsb 
    709   1488       rsb 	/* Whack the pointer right away */
    710   1488       rsb 	vfsp->vfs_vskap = NULL;
    711   1488       rsb 
    712   1488       rsb 	/* Lock the tree, remove the node, and delete the kstat */
    713   1488       rsb 	mutex_enter(&vskstat_tree_lock);
    714   1488       rsb 	if (avl_find(&vskstat_tree, vskap, &where)) {
    715   1488       rsb 		avl_remove(&vskstat_tree, vskap);
    716   1488       rsb 	}
    717   1488       rsb 
    718   1488       rsb 	if (vskap->vsk_ksp) {
    719   1488       rsb 		kstat_delete(vskap->vsk_ksp);
    720   1488       rsb 	}
    721   1488       rsb 	mutex_exit(&vskstat_tree_lock);
    722   1488       rsb 
    723   1488       rsb 	kmem_cache_free(vsk_anchor_cache, vskap);
    724   1488       rsb }
    725      0    stevel 
    726      0    stevel /*
    727      0    stevel  * Read or write a vnode.  Called from kernel code.
    728      0    stevel  */
    729      0    stevel int
    730      0    stevel vn_rdwr(
    731      0    stevel 	enum uio_rw rw,
    732      0    stevel 	struct vnode *vp,
    733      0    stevel 	caddr_t base,
    734      0    stevel 	ssize_t len,
    735      0    stevel 	offset_t offset,
    736      0    stevel 	enum uio_seg seg,
    737      0    stevel 	int ioflag,
    738      0    stevel 	rlim64_t ulimit,	/* meaningful only if rw is UIO_WRITE */
    739      0    stevel 	cred_t *cr,
    740      0    stevel 	ssize_t *residp)
    741      0    stevel {
    742      0    stevel 	struct uio uio;
    743      0    stevel 	struct iovec iov;
    744      0    stevel 	int error;
    745      0    stevel 	int in_crit = 0;
    746      0    stevel 
    747      0    stevel 	if (rw == UIO_WRITE && ISROFILE(vp))
    748      0    stevel 		return (EROFS);
    749      0    stevel 
    750      0    stevel 	if (len < 0)
    751      0    stevel 		return (EIO);
    752   4321    casper 
    753   4321    casper 	VOPXID_MAP_CR(vp, cr);
    754      0    stevel 
    755      0    stevel 	iov.iov_base = base;
    756      0    stevel 	iov.iov_len = len;
    757      0    stevel 	uio.uio_iov = &iov;
    758      0    stevel 	uio.uio_iovcnt = 1;
    759      0    stevel 	uio.uio_loffset = offset;
    760      0    stevel 	uio.uio_segflg = (short)seg;
    761      0    stevel 	uio.uio_resid = len;
    762      0    stevel 	uio.uio_llimit = ulimit;
    763      0    stevel 
    764      0    stevel 	/*
    765      0    stevel 	 * We have to enter the critical region before calling VOP_RWLOCK
    766      0    stevel 	 * to avoid a deadlock with ufs.
    767      0    stevel 	 */
    768      0    stevel 	if (nbl_need_check(vp)) {
    769      0    stevel 		int svmand;
    770      0    stevel 
    771      0    stevel 		nbl_start_crit(vp, RW_READER);
    772      0    stevel 		in_crit = 1;
    773      0    stevel 		error = nbl_svmand(vp, cr, &svmand);
    774      0    stevel 		if (error != 0)
    775      0    stevel 			goto done;
    776      0    stevel 		if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
    777   5331       amw 		    uio.uio_offset, uio.uio_resid, svmand, NULL)) {
    778      0    stevel 			error = EACCES;
    779      0    stevel 			goto done;
    780      0    stevel 		}
    781      0    stevel 	}
    782      0    stevel 
    783      0    stevel 	(void) VOP_RWLOCK(vp,
    784   4956  pf199842 	    rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
    785      0    stevel 	if (rw == UIO_WRITE) {
    786      0    stevel 		uio.uio_fmode = FWRITE;
    787      0    stevel 		uio.uio_extflg = UIO_COPY_DEFAULT;
    788      0    stevel 		error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
    789      0    stevel 	} else {
    790      0    stevel 		uio.uio_fmode = FREAD;
    791      0    stevel 		uio.uio_extflg = UIO_COPY_CACHED;
    792      0    stevel 		error = VOP_READ(vp, &uio, ioflag, cr, NULL);
    793      0    stevel 	}
    794   5331       amw 	VOP_RWUNLOCK(vp,
    795   5331       amw 	    rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
    796      0    stevel 	if (residp)
    797      0    stevel 		*residp = uio.uio_resid;
    798      0    stevel 	else if (uio.uio_resid)
    799      0    stevel 		error = EIO;
    800      0    stevel 
    801      0    stevel done:
    802      0    stevel 	if (in_crit)
    803      0    stevel 		nbl_end_crit(vp);
    804      0    stevel 	return (error);
    805      0    stevel }
    806      0    stevel 
    807      0    stevel /*
    808      0    stevel  * Release a vnode.  Call VOP_INACTIVE on last reference or
    809      0    stevel  * decrement reference count.
    810      0    stevel  *
    811      0    stevel  * To avoid race conditions, the v_count is left at 1 for
    812      0    stevel  * the call to VOP_INACTIVE. This prevents another thread
    813      0    stevel  * from reclaiming and releasing the vnode *before* the
    814      0    stevel  * VOP_INACTIVE routine has a chance to destroy the vnode.
    815      0    stevel  * We can't have more than 1 thread calling VOP_INACTIVE
    816      0    stevel  * on a vnode.
    817      0    stevel  */
    818      0    stevel void
    819      0    stevel vn_rele(vnode_t *vp)
    820      0    stevel {
    821   6712     tomee 	VERIFY(vp->v_count > 0);
    822      0    stevel 	mutex_enter(&vp->v_lock);
    823      0    stevel 	if (vp->v_count == 1) {
    824      0    stevel 		mutex_exit(&vp->v_lock);
    825   5331       amw 		VOP_INACTIVE(vp, CRED(), NULL);
    826   6712     tomee 		return;
    827   6712     tomee 	}
    828   6712     tomee 	vp->v_count--;
    829   6712     tomee 	mutex_exit(&vp->v_lock);
    830   6712     tomee }
    831   6712     tomee 
    832   6712     tomee /*
    833   6712     tomee  * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
    834   6712     tomee  * as a single reference, so v_count is not decremented until the last DNLC hold
    835   6712     tomee  * is released. This makes it possible to distinguish vnodes that are referenced
    836   6712     tomee  * only by the DNLC.
    837   6712     tomee  */
    838   6712     tomee void
    839   6712     tomee vn_rele_dnlc(vnode_t *vp)
    840   6712     tomee {
    841   6712     tomee 	VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
    842   6712     tomee 	mutex_enter(&vp->v_lock);
    843   6712     tomee 	if (--vp->v_count_dnlc == 0) {
    844   6712     tomee 		if (vp->v_count == 1) {
    845   6712     tomee 			mutex_exit(&vp->v_lock);
    846   6712     tomee 			VOP_INACTIVE(vp, CRED(), NULL);
    847   6712     tomee 			return;
    848   6712     tomee 		}
    849      0    stevel 		vp->v_count--;
    850   6712     tomee 	}
    851   6712     tomee 	mutex_exit(&vp->v_lock);
    852      0    stevel }
    853      0    stevel 
    854      0    stevel /*
    855      0    stevel  * Like vn_rele() except that it clears v_stream under v_lock.
    856      0    stevel  * This is used by sockfs when it dismantels the association between
    857      0    stevel  * the sockfs node and the vnode in the underlaying file system.
    858      0    stevel  * v_lock has to be held to prevent a thread coming through the lookupname
    859      0    stevel  * path from accessing a stream head that is going away.
    860      0    stevel  */
    861      0    stevel void
    862      0    stevel vn_rele_stream(vnode_t *vp)
    863      0    stevel {
    864   6712     tomee 	VERIFY(vp->v_count > 0);
    865      0    stevel 	mutex_enter(&vp->v_lock);
    866      0    stevel 	vp->v_stream = NULL;
    867      0    stevel 	if (vp->v_count == 1) {
    868      0    stevel 		mutex_exit(&vp->v_lock);
    869   5331       amw 		VOP_INACTIVE(vp, CRED(), NULL);
    870   6712     tomee 		return;
    871   6712     tomee 	}
    872   6712     tomee 	vp->v_count--;
    873   6712     tomee 	mutex_exit(&vp->v_lock);
    874      0    stevel }
    875      0    stevel 
    876   9321      Neil static void
    877   9321      Neil vn_rele_inactive(vnode_t *vp)
    878   9321      Neil {
    879   9321      Neil 	VOP_INACTIVE(vp, CRED(), NULL);
    880   9321      Neil }
    881   9321      Neil 
    882   9321      Neil /*
    883   9321      Neil  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
    884   9321      Neil  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
    885   9321      Neil  * the file system as a result of releasing the vnode. Note, file systems
    886   9321      Neil  * already have to handle the race where the vnode is incremented before the
    887   9321      Neil  * inactive routine is called and does its locking.
    888   9321      Neil  *
    889   9321      Neil  * Warning: Excessive use of this routine can lead to performance problems.
    890   9321      Neil  * This is because taskqs throttle back allocation if too many are created.
    891   9321      Neil  */
    892   9321      Neil void
    893   9321      Neil vn_rele_async(vnode_t *vp, taskq_t *taskq)
    894   9321      Neil {
    895   9321      Neil 	VERIFY(vp->v_count > 0);
    896   9321      Neil 	mutex_enter(&vp->v_lock);
    897   9321      Neil 	if (vp->v_count == 1) {
    898   9321      Neil 		mutex_exit(&vp->v_lock);
    899   9321      Neil 		VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
    900   9321      Neil 		    vp, TQ_SLEEP) != NULL);
    901   9321      Neil 		return;
    902   9321      Neil 	}
    903   9321      Neil 	vp->v_count--;
    904   9321      Neil 	mutex_exit(&vp->v_lock);
    905   9321      Neil }
    906   9321      Neil 
    907      0    stevel int
    908      0    stevel vn_open(
    909      0    stevel 	char *pnamep,
    910      0    stevel 	enum uio_seg seg,
    911      0    stevel 	int filemode,
    912      0    stevel 	int createmode,
    913      0    stevel 	struct vnode **vpp,
    914      0    stevel 	enum create crwhy,
    915      0    stevel 	mode_t umask)
    916      0    stevel {
    917   5331       amw 	return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
    918   5331       amw 	    umask, NULL, -1));
    919      0    stevel }
    920      0    stevel 
    921      0    stevel 
    922      0    stevel /*
    923      0    stevel  * Open/create a vnode.
    924      0    stevel  * This may be callable by the kernel, the only known use
    925      0    stevel  * of user context being that the current user credentials
    926      0    stevel  * are used for permissions.  crwhy is defined iff filemode & FCREAT.
    927      0    stevel  */
    928      0    stevel int
    929      0    stevel vn_openat(
    930      0    stevel 	char *pnamep,
    931      0    stevel 	enum uio_seg seg,
    932      0    stevel 	int filemode,
    933      0    stevel 	int createmode,
    934      0    stevel 	struct vnode **vpp,
    935      0    stevel 	enum create crwhy,
    936      0    stevel 	mode_t umask,
    937   5331       amw 	struct vnode *startvp,
    938   5331       amw 	int fd)
    939      0    stevel {
    940      0    stevel 	struct vnode *vp;
    941      0    stevel 	int mode;
    942   5331       amw 	int accessflags;
    943      0    stevel 	int error;
    944      0    stevel 	int in_crit = 0;
    945   5331       amw 	int open_done = 0;
    946   5331       amw 	int shrlock_done = 0;
    947      0    stevel 	struct vattr vattr;
    948      0    stevel 	enum symfollow follow;
    949   2051  prabahar 	int estale_retry = 0;
    950   5331       amw 	struct shrlock shr;
    951   5331       amw 	struct shr_locowner shr_own;
    952      0    stevel 
    953      0    stevel 	mode = 0;
    954   5331       amw 	accessflags = 0;
    955      0    stevel 	if (filemode & FREAD)
    956      0    stevel 		mode |= VREAD;
    957      0    stevel 	if (filemode & (FWRITE|FTRUNC))
    958      0    stevel 		mode |= VWRITE;
    959   5331       amw 	if (filemode & FXATTRDIROPEN)
    960   5331       amw 		mode |= VEXEC;
    961      0    stevel 
    962      0    stevel 	/* symlink interpretation */
    963      0    stevel 	if (filemode & FNOFOLLOW)
    964      0    stevel 		follow = NO_FOLLOW;
    965      0    stevel 	else
    966      0    stevel 		follow = FOLLOW;
    967   5331       amw 
    968   5331       amw 	if (filemode & FAPPEND)
    969   5331       amw 		accessflags |= V_APPEND;
    970      0    stevel 
    971      0    stevel top:
    972      0    stevel 	if (filemode & FCREAT) {
    973      0    stevel 		enum vcexcl excl;
    974      0    stevel 
    975      0    stevel 		/*
    976      0    stevel 		 * Wish to create a file.
    977      0    stevel 		 */
    978      0    stevel 		vattr.va_type = VREG;
    979      0    stevel 		vattr.va_mode = createmode;
    980      0    stevel 		vattr.va_mask = AT_TYPE|AT_MODE;
    981      0    stevel 		if (filemode & FTRUNC) {
    982      0    stevel 			vattr.va_size = 0;
    983      0    stevel 			vattr.va_mask |= AT_SIZE;
    984      0    stevel 		}
    985      0    stevel 		if (filemode & FEXCL)
    986      0    stevel 			excl = EXCL;
    987      0    stevel 		else
    988      0    stevel 			excl = NONEXCL;
    989      0    stevel 
    990      0    stevel 		if (error =
    991      0    stevel 		    vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
    992   5050   jwahlig 		    (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
    993      0    stevel 			return (error);
    994      0    stevel 	} else {
    995      0    stevel 		/*
    996      0    stevel 		 * Wish to open a file.  Just look it up.
    997      0    stevel 		 */
    998      0    stevel 		if (error = lookupnameat(pnamep, seg, follow,
    999      0    stevel 		    NULLVPP, &vp, startvp)) {
   1000   2051  prabahar 			if ((error == ESTALE) &&
   1001   2051  prabahar 			    fs_need_estale_retry(estale_retry++))
   1002      0    stevel 				goto top;
   1003      0    stevel 			return (error);
   1004      0    stevel 		}
   1005      0    stevel 
   1006      0    stevel 		/*
   1007      0    stevel 		 * Get the attributes to check whether file is large.
   1008      0    stevel 		 * We do this only if the FOFFMAX flag is not set and
   1009      0    stevel 		 * only for regular files.
   1010      0    stevel 		 */
   1011      0    stevel 
   1012      0    stevel 		if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
   1013      0    stevel 			vattr.va_mask = AT_SIZE;
   1014   5331       amw 			if ((error = VOP_GETATTR(vp, &vattr, 0,
   1015   5331       amw 			    CRED(), NULL))) {
   1016      0    stevel 				goto out;
   1017      0    stevel 			}
   1018      0    stevel 			if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
   1019      0    stevel 				/*
   1020      0    stevel 				 * Large File API - regular open fails
   1021      0    stevel 				 * if FOFFMAX flag is set in file mode
   1022      0    stevel 				 */
   1023      0    stevel 				error = EOVERFLOW;
   1024      0    stevel 				goto out;
   1025      0    stevel 			}
   1026      0    stevel 		}
   1027      0    stevel 		/*
   1028      0    stevel 		 * Can't write directories, active texts, or
   1029      0    stevel 		 * read-only filesystems.  Can't truncate files
   1030      0    stevel 		 * on which mandatory locking is in effect.
   1031      0    stevel 		 */
   1032      0    stevel 		if (filemode & (FWRITE|FTRUNC)) {
   1033      0    stevel 			/*
   1034      0    stevel 			 * Allow writable directory if VDIROPEN flag is set.
   1035      0    stevel 			 */
   1036      0    stevel 			if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
   1037      0    stevel 				error = EISDIR;
   1038      0    stevel 				goto out;
   1039      0    stevel 			}
   1040      0    stevel 			if (ISROFILE(vp)) {
   1041      0    stevel 				error = EROFS;
   1042      0    stevel 				goto out;
   1043      0    stevel 			}
   1044      0    stevel 			/*
   1045   5331       amw 			 * Can't truncate files on which
   1046   5331       amw 			 * sysv mandatory locking is in effect.
   1047      0    stevel 			 */
   1048      0    stevel 			if (filemode & FTRUNC) {
   1049      0    stevel 				vnode_t *rvp;
   1050      0    stevel 
   1051   5331       amw 				if (VOP_REALVP(vp, &rvp, NULL) != 0)
   1052      0    stevel 					rvp = vp;
   1053   5331       amw 				if (rvp->v_filocks != NULL) {
   1054      0    stevel 					vattr.va_mask = AT_MODE;
   1055   5331       amw 					if ((error = VOP_GETATTR(vp,
   1056   5331       amw 					    &vattr, 0, CRED(), NULL)) == 0 &&
   1057   5331       amw 					    MANDLOCK(vp, vattr.va_mode))
   1058      0    stevel 						error = EAGAIN;
   1059      0    stevel 				}
   1060      0    stevel 			}
   1061      0    stevel 			if (error)
   1062      0    stevel 				goto out;
   1063      0    stevel 		}
   1064      0    stevel 		/*
   1065      0    stevel 		 * Check permissions.
   1066      0    stevel 		 */
   1067   5331       amw 		if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
   1068      0    stevel 			goto out;
   1069      0    stevel 	}
   1070      0    stevel 
   1071      0    stevel 	/*
   1072      0    stevel 	 * Do remaining checks for FNOFOLLOW and FNOLINKS.
   1073      0    stevel 	 */
   1074      0    stevel 	if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
   1075   2712   nn35248 		error = ELOOP;
   1076      0    stevel 		goto out;
   1077      0    stevel 	}
   1078      0    stevel 	if (filemode & FNOLINKS) {
   1079      0    stevel 		vattr.va_mask = AT_NLINK;
   1080   5331       amw 		if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
   1081      0    stevel 			goto out;
   1082      0    stevel 		}
   1083      0    stevel 		if (vattr.va_nlink != 1) {
   1084      0    stevel 			error = EMLINK;
   1085      0    stevel 			goto out;
   1086      0    stevel 		}
   1087      0    stevel 	}
   1088      0    stevel 
   1089      0    stevel 	/*
   1090      0    stevel 	 * Opening a socket corresponding to the AF_UNIX pathname
   1091      0    stevel 	 * in the filesystem name space is not supported.
   1092      0    stevel 	 * However, VSOCK nodes in namefs are supported in order
   1093      0    stevel 	 * to make fattach work for sockets.
   1094      0    stevel 	 *
   1095      0    stevel 	 * XXX This uses VOP_REALVP to distinguish between
   1096      0    stevel 	 * an unopened namefs node (where VOP_REALVP returns a
   1097      0    stevel 	 * different VSOCK vnode) and a VSOCK created by vn_create
   1098      0    stevel 	 * in some file system (where VOP_REALVP would never return
   1099      0    stevel 	 * a different vnode).
   1100      0    stevel 	 */
   1101      0    stevel 	if (vp->v_type == VSOCK) {
   1102      0    stevel 		struct vnode *nvp;
   1103      0    stevel 
   1104   5331       amw 		error = VOP_REALVP(vp, &nvp, NULL);
   1105      0    stevel 		if (error != 0 || nvp == NULL || nvp == vp ||
   1106      0    stevel 		    nvp->v_type != VSOCK) {
   1107      0    stevel 			error = EOPNOTSUPP;
   1108      0    stevel 			goto out;
   1109      0    stevel 		}
   1110      0    stevel 	}
   1111   5331       amw 
   1112   5331       amw 	if ((vp->v_type == VREG) && nbl_need_check(vp)) {
   1113   5331       amw 		/* get share reservation */
   1114   5331       amw 		shr.s_access = 0;
   1115   5331       amw 		if (filemode & FWRITE)
   1116   5331       amw 			shr.s_access |= F_WRACC;
   1117   5331       amw 		if (filemode & FREAD)
   1118   5331       amw 			shr.s_access |= F_RDACC;
   1119   5331       amw 		shr.s_deny = 0;
   1120   5331       amw 		shr.s_sysid = 0;
   1121   5331       amw 		shr.s_pid = ttoproc(curthread)->p_pid;
   1122   5331       amw 		shr_own.sl_pid = shr.s_pid;
   1123   5331       amw 		shr_own.sl_id = fd;
   1124   5331       amw 		shr.s_own_len = sizeof (shr_own);
   1125   5331       amw 		shr.s_owner = (caddr_t)&shr_own;
   1126   5331       amw 		error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
   1127   5331       amw 		    NULL);
   1128   5331       amw 		if (error)
   1129   5331       amw 			goto out;
   1130   5331       amw 		shrlock_done = 1;
   1131   5331       amw 
   1132   5331       amw 		/* nbmand conflict check if truncating file */
   1133   5331       amw 		if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
   1134   5331       amw 			nbl_start_crit(vp, RW_READER);
   1135   5331       amw 			in_crit = 1;
   1136   5331       amw 
   1137   5331       amw 			vattr.va_mask = AT_SIZE;
   1138   5331       amw 			if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
   1139   5331       amw 				goto out;
   1140   5331       amw 			if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
   1141   5331       amw 			    NULL)) {
   1142   5331       amw 				error = EACCES;
   1143   5331       amw 				goto out;
   1144   5331       amw 			}
   1145   5331       amw 		}
   1146   5331       amw 	}
   1147   5331       amw 
   1148      0    stevel 	/*
   1149      0    stevel 	 * Do opening protocol.
   1150      0    stevel 	 */
   1151   5331       amw 	error = VOP_OPEN(&vp, filemode, CRED(), NULL);
   1152   5331       amw 	if (error)
   1153   5331       amw 		goto out;
   1154   5331       amw 	open_done = 1;
   1155   5331       amw 
   1156      0    stevel 	/*
   1157      0    stevel 	 * Truncate if required.
   1158      0    stevel 	 */
   1159   5331       amw 	if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
   1160      0    stevel 		vattr.va_size = 0;
   1161      0    stevel 		vattr.va_mask = AT_SIZE;
   1162      0    stevel 		if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
   1163   5331       amw 			goto out;
   1164      0    stevel 	}
   1165      0    stevel out:
   1166      0    stevel 	ASSERT(vp->v_count > 0);
   1167      0    stevel 
   1168      0    stevel 	if (in_crit) {
   1169      0    stevel 		nbl_end_crit(vp);
   1170      0    stevel 		in_crit = 0;
   1171      0    stevel 	}
   1172      0    stevel 	if (error) {
   1173   5331       amw 		if (open_done) {
   1174   5331       amw 			(void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
   1175   5331       amw 			    NULL);
   1176   5331       amw 			open_done = 0;
   1177   5331       amw 			shrlock_done = 0;
   1178   5331       amw 		}
   1179   5331       amw 		if (shrlock_done) {
   1180   5331       amw 			(void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
   1181   5331       amw 			    NULL);
   1182   5331       amw 			shrlock_done = 0;
   1183   5331       amw 		}
   1184   5331       amw 
   1185      0    stevel 		/*
   1186      0    stevel 		 * The following clause was added to handle a problem
   1187      0    stevel 		 * with NFS consistency.  It is possible that a lookup
   1188      0    stevel 		 * of the file to be opened succeeded, but the file
   1189      0    stevel 		 * itself doesn't actually exist on the server.  This
   1190      0    stevel 		 * is chiefly due to the DNLC containing an entry for
   1191      0    stevel 		 * the file which has been removed on the server.  In
   1192      0    stevel 		 * this case, we just start over.  If there was some
   1193      0    stevel 		 * other cause for the ESTALE error, then the lookup
   1194      0    stevel 		 * of the file will fail and the error will be returned
   1195      0    stevel 		 * above instead of looping around from here.
   1196      0    stevel 		 */
   1197      0    stevel 		VN_RELE(vp);
   1198   2051  prabahar 		if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1199      0    stevel 			goto top;
   1200      0    stevel 	} else
   1201      0    stevel 		*vpp = vp;
   1202      0    stevel 	return (error);
   1203      0    stevel }
   1204      0    stevel 
   1205   5331       amw /*
   1206   5331       amw  * The following two accessor functions are for the NFSv4 server.  Since there
   1207   5331       amw  * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
   1208   5331       amw  * vnode open counts correct when a client "upgrades" an open or does an
   1209   5331       amw  * open_downgrade.  In NFS, an upgrade or downgrade can not only change the
   1210   5331       amw  * open mode (add or subtract read or write), but also change the share/deny
   1211   5331       amw  * modes.  However, share reservations are not integrated with OPEN, yet, so
   1212   5331       amw  * we need to handle each separately.  These functions are cleaner than having
   1213   5331       amw  * the NFS server manipulate the counts directly, however, nobody else should
   1214   5331       amw  * use these functions.
   1215   5331       amw  */
   1216   5331       amw void
   1217   5331       amw vn_open_upgrade(
   1218   5331       amw 	vnode_t *vp,
   1219   5331       amw 	int filemode)
   1220   5331       amw {
   1221   5331       amw 	ASSERT(vp->v_type == VREG);
   1222   5331       amw 
   1223   5331       amw 	if (filemode & FREAD)
   1224   5331       amw 		atomic_add_32(&(vp->v_rdcnt), 1);
   1225   5331       amw 	if (filemode & FWRITE)
   1226   5331       amw 		atomic_add_32(&(vp->v_wrcnt), 1);
   1227   5331       amw 
   1228   5331       amw }
   1229   5331       amw 
   1230   5331       amw void
   1231   5331       amw vn_open_downgrade(
   1232   5331       amw 	vnode_t *vp,
   1233   5331       amw 	int filemode)
   1234   5331       amw {
   1235   5331       amw 	ASSERT(vp->v_type == VREG);
   1236   5331       amw 
   1237   5331       amw 	if (filemode & FREAD) {
   1238   5331       amw 		ASSERT(vp->v_rdcnt > 0);
   1239   5331       amw 		atomic_add_32(&(vp->v_rdcnt), -1);
   1240   5331       amw 	}
   1241   5331       amw 	if (filemode & FWRITE) {
   1242   5331       amw 		ASSERT(vp->v_wrcnt > 0);
   1243   5331       amw 		atomic_add_32(&(vp->v_wrcnt), -1);
   1244   5331       amw 	}
   1245   5331       amw 
   1246   5331       amw }
   1247   5331       amw 
   1248      0    stevel int
   1249      0    stevel vn_create(
   1250      0    stevel 	char *pnamep,
   1251      0    stevel 	enum uio_seg seg,
   1252      0    stevel 	struct vattr *vap,
   1253      0    stevel 	enum vcexcl excl,
   1254      0    stevel 	int mode,
   1255      0    stevel 	struct vnode **vpp,
   1256      0    stevel 	enum create why,
   1257      0    stevel 	int flag,
   1258      0    stevel 	mode_t umask)
   1259      0    stevel {
   1260   5331       amw 	return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
   1261   5331       amw 	    umask, NULL));
   1262      0    stevel }
   1263      0    stevel 
   1264      0    stevel /*
   1265      0    stevel  * Create a vnode (makenode).
   1266      0    stevel  */
   1267      0    stevel int
   1268      0    stevel vn_createat(
   1269      0    stevel 	char *pnamep,
   1270      0    stevel 	enum uio_seg seg,
   1271      0    stevel 	struct vattr *vap,
   1272      0    stevel 	enum vcexcl excl,
   1273      0    stevel 	int mode,
   1274      0    stevel 	struct vnode **vpp,
   1275      0    stevel 	enum create why,
   1276      0    stevel 	int flag,
   1277      0    stevel 	mode_t umask,
   1278      0    stevel 	struct vnode *startvp)
   1279      0    stevel {
   1280      0    stevel 	struct vnode *dvp;	/* ptr to parent dir vnode */
   1281      0    stevel 	struct vnode *vp = NULL;
   1282      0    stevel 	struct pathname pn;
   1283      0    stevel 	int error;
   1284      0    stevel 	int in_crit = 0;
   1285      0    stevel 	struct vattr vattr;
   1286      0    stevel 	enum symfollow follow;
   1287   2051  prabahar 	int estale_retry = 0;
   1288      0    stevel 
   1289      0    stevel 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
   1290      0    stevel 
   1291      0    stevel 	/* symlink interpretation */
   1292      0    stevel 	if ((flag & FNOFOLLOW) || excl == EXCL)
   1293      0    stevel 		follow = NO_FOLLOW;
   1294      0    stevel 	else
   1295      0    stevel 		follow = FOLLOW;
   1296      0    stevel 	flag &= ~(FNOFOLLOW|FNOLINKS);
   1297      0    stevel 
   1298      0    stevel top:
   1299      0    stevel 	/*
   1300      0    stevel 	 * Lookup directory.
   1301      0    stevel 	 * If new object is a file, call lower level to create it.
   1302      0    stevel 	 * Note that it is up to the lower level to enforce exclusive
   1303      0    stevel 	 * creation, if the file is already there.
   1304      0    stevel 	 * This allows the lower level to do whatever
   1305      0    stevel 	 * locking or protocol that is needed to prevent races.
   1306      0    stevel 	 * If the new object is directory call lower level to make
   1307      0    stevel 	 * the new directory, with "." and "..".
   1308      0    stevel 	 */
   1309      0    stevel 	if (error = pn_get(pnamep, seg, &pn))
   1310      0    stevel 		return (error);
   1311      0    stevel 	if (audit_active)
   1312      0    stevel 		audit_vncreate_start();
   1313      0    stevel 	dvp = NULL;
   1314      0    stevel 	*vpp = NULL;
   1315      0    stevel 	/*
   1316      0    stevel 	 * lookup will find the parent directory for the vnode.
   1317      0    stevel 	 * When it is done the pn holds the name of the entry
   1318      0    stevel 	 * in the directory.
   1319      0    stevel 	 * If this is a non-exclusive create we also find the node itself.
   1320      0    stevel 	 */
   1321      0    stevel 	error = lookuppnat(&pn, NULL, follow, &dvp,
   1322      0    stevel 	    (excl == EXCL) ? NULLVPP : vpp, startvp);
   1323      0    stevel 	if (error) {
   1324      0    stevel 		pn_free(&pn);
   1325   2051  prabahar 		if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1326      0    stevel 			goto top;
   1327      0    stevel 		if (why == CRMKDIR && error == EINVAL)
   1328      0    stevel 			error = EEXIST;		/* SVID */
   1329      0    stevel 		return (error);
   1330      0    stevel 	}
   1331      0    stevel 
   1332      0    stevel 	if (why != CRMKNOD)
   1333      0    stevel 		vap->va_mode &= ~VSVTX;
   1334      0    stevel 
   1335      0    stevel 	/*
   1336      0    stevel 	 * If default ACLs are defined for the directory don't apply the
   1337      0    stevel 	 * umask if umask is passed.
   1338      0    stevel 	 */
   1339      0    stevel 
   1340      0    stevel 	if (umask) {
   1341      0    stevel 
   1342      0    stevel 		vsecattr_t vsec;
   1343      0    stevel 
   1344      0    stevel 		vsec.vsa_aclcnt = 0;
   1345      0    stevel 		vsec.vsa_aclentp = NULL;
   1346      0    stevel 		vsec.vsa_dfaclcnt = 0;
   1347      0    stevel 		vsec.vsa_dfaclentp = NULL;
   1348      0    stevel 		vsec.vsa_mask = VSA_DFACLCNT;
   1349   5331       amw 		error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
   1350    789    ahrens 		/*
   1351    789    ahrens 		 * If error is ENOSYS then treat it as no error
   1352    789    ahrens 		 * Don't want to force all file systems to support
   1353    789    ahrens 		 * aclent_t style of ACL's.
   1354    789    ahrens 		 */
   1355    789    ahrens 		if (error == ENOSYS)
   1356    789    ahrens 			error = 0;
   1357    789    ahrens 		if (error) {
   1358      0    stevel 			if (*vpp != NULL)
   1359      0    stevel 				VN_RELE(*vpp);
   1360      0    stevel 			goto out;
   1361    789    ahrens 		} else {
   1362    789    ahrens 			/*
   1363    789    ahrens 			 * Apply the umask if no default ACLs.
   1364    789    ahrens 			 */
   1365    789    ahrens 			if (vsec.vsa_dfaclcnt == 0)
   1366    789    ahrens 				vap->va_mode &= ~umask;
   1367    789    ahrens 
   1368    789    ahrens 			/*
   1369    789    ahrens 			 * VOP_GETSECATTR() may have allocated memory for
   1370    789    ahrens 			 * ACLs we didn't request, so double-check and
   1371    789    ahrens 			 * free it if necessary.
   1372    789    ahrens 			 */
   1373    789    ahrens 			if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
   1374    789    ahrens 				kmem_free((caddr_t)vsec.vsa_aclentp,
   1375    789    ahrens 				    vsec.vsa_aclcnt * sizeof (aclent_t));
   1376    789    ahrens 			if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
   1377    789    ahrens 				kmem_free((caddr_t)vsec.vsa_dfaclentp,
   1378    789    ahrens 				    vsec.vsa_dfaclcnt * sizeof (aclent_t));
   1379      0    stevel 		}
   1380      0    stevel 	}
   1381      0    stevel 
   1382      0    stevel 	/*
   1383      0    stevel 	 * In general we want to generate EROFS if the file system is
   1384      0    stevel 	 * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
   1385      0    stevel 	 * documents the open system call, and it says that O_CREAT has no
   1386      0    stevel 	 * effect if the file already exists.  Bug 1119649 states
   1387      0    stevel 	 * that open(path, O_CREAT, ...) fails when attempting to open an
   1388      0    stevel 	 * existing file on a read only file system.  Thus, the first part
   1389      0    stevel 	 * of the following if statement has 3 checks:
   1390      0    stevel 	 *	if the file exists &&
   1391      0    stevel 	 *		it is being open with write access &&
   1392      0    stevel 	 *		the file system is read only
   1393      0    stevel 	 *	then generate EROFS
   1394      0    stevel 	 */
   1395      0    stevel 	if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
   1396      0    stevel 	    (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
   1397      0    stevel 		if (*vpp)
   1398      0    stevel 			VN_RELE(*vpp);
   1399      0    stevel 		error = EROFS;
   1400      0    stevel 	} else if (excl == NONEXCL && *vpp != NULL) {
   1401      0    stevel 		vnode_t *rvp;
   1402      0    stevel 
   1403      0    stevel 		/*
   1404      0    stevel 		 * File already exists.  If a mandatory lock has been
   1405      0    stevel 		 * applied, return error.
   1406      0    stevel 		 */
   1407      0    stevel 		vp = *vpp;
   1408   5331       amw 		if (VOP_REALVP(vp, &rvp, NULL) != 0)
   1409      0    stevel 			rvp = vp;
   1410      0    stevel 		if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
   1411      0    stevel 			nbl_start_crit(vp, RW_READER);
   1412      0    stevel 			in_crit = 1;
   1413      0    stevel 		}
   1414      0    stevel 		if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
   1415      0    stevel 			vattr.va_mask = AT_MODE|AT_SIZE;
   1416   5331       amw 			if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
   1417      0    stevel 				goto out;
   1418      0    stevel 			}
   1419      0    stevel 			if (MANDLOCK(vp, vattr.va_mode)) {
   1420      0    stevel 				error = EAGAIN;
   1421      0    stevel 				goto out;
   1422      0    stevel 			}
   1423      0    stevel 			/*
   1424      0    stevel 			 * File cannot be truncated if non-blocking mandatory
   1425      0    stevel 			 * locks are currently on the file.
   1426      0    stevel 			 */
   1427      0    stevel 			if ((vap->va_mask & AT_SIZE) && in_crit) {
   1428      0    stevel 				u_offset_t offset;
   1429      0    stevel 				ssize_t length;
   1430      0    stevel 
   1431      0    stevel 				offset = vap->va_size > vattr.va_size ?
   1432   4956  pf199842 				    vattr.va_size : vap->va_size;
   1433      0    stevel 				length = vap->va_size > vattr.va_size ?
   1434   4956  pf199842 				    vap->va_size - vattr.va_size :
   1435   4956  pf199842 				    vattr.va_size - vap->va_size;
   1436      0    stevel 				if (nbl_conflict(vp, NBL_WRITE, offset,
   1437   5331       amw 				    length, 0, NULL)) {
   1438      0    stevel 					error = EACCES;
   1439      0    stevel 					goto out;
   1440      0    stevel 				}
   1441      0    stevel 			}
   1442      0    stevel 		}
   1443      0    stevel 
   1444      0    stevel 		/*
   1445      0    stevel 		 * If the file is the root of a VFS, we've crossed a
   1446      0    stevel 		 * mount point and the "containing" directory that we
   1447      0    stevel 		 * acquired above (dvp) is irrelevant because it's in
   1448      0    stevel 		 * a different file system.  We apply VOP_CREATE to the
   1449      0    stevel 		 * target itself instead of to the containing directory
   1450      0    stevel 		 * and supply a null path name to indicate (conventionally)
   1451      0    stevel 		 * the node itself as the "component" of interest.
   1452      0    stevel 		 *
   1453      0    stevel 		 * The intercession of the file system is necessary to
   1454      0    stevel 		 * ensure that the appropriate permission checks are
   1455      0    stevel 		 * done.
   1456      0    stevel 		 */
   1457      0    stevel 		if (vp->v_flag & VROOT) {
   1458      0    stevel 			ASSERT(why != CRMKDIR);
   1459   5331       amw 			error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
   1460   5331       amw 			    CRED(), flag, NULL, NULL);
   1461      0    stevel 			/*
   1462      0    stevel 			 * If the create succeeded, it will have created
   1463      0    stevel 			 * a new reference to the vnode.  Give up the
   1464      0    stevel 			 * original reference.  The assertion should not
   1465      0    stevel 			 * get triggered because NBMAND locks only apply to
   1466      0    stevel 			 * VREG files.  And if in_crit is non-zero for some
   1467      0    stevel 			 * reason, detect that here, rather than when we
   1468      0    stevel 			 * deference a null vp.
   1469      0    stevel 			 */
   1470      0    stevel 			ASSERT(in_crit == 0);
   1471      0    stevel 			VN_RELE(vp);
   1472      0    stevel 			vp = NULL;
   1473      0    stevel 			goto out;
   1474      0    stevel 		}
   1475      0    stevel 
   1476      0    stevel 		/*
   1477      0    stevel 		 * Large File API - non-large open (FOFFMAX flag not set)
   1478      0    stevel 		 * of regular file fails if the file size exceeds MAXOFF32_T.
   1479      0    stevel 		 */
   1480      0    stevel 		if (why != CRMKDIR &&
   1481      0    stevel 		    !(flag & FOFFMAX) &&
   1482      0    stevel 		    (vp->v_type == VREG)) {
   1483      0    stevel 			vattr.va_mask = AT_SIZE;
   1484   5331       amw 			if ((error = VOP_GETATTR(vp, &vattr, 0,
   1485   5331       amw 			    CRED(), NULL))) {
   1486      0    stevel 				goto out;
   1487      0    stevel 			}
   1488      0    stevel 			if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
   1489      0    stevel 				error = EOVERFLOW;
   1490      0    stevel 				goto out;
   1491      0    stevel 			}
   1492      0    stevel 		}
   1493      0    stevel 	}
   1494      0    stevel 
   1495      0    stevel 	if (error == 0) {
   1496      0    stevel 		/*
   1497      0    stevel 		 * Call mkdir() if specified, otherwise create().
   1498      0    stevel 		 */
   1499      0    stevel 		int must_be_dir = pn_fixslash(&pn);	/* trailing '/'? */
   1500      0    stevel 
   1501      0    stevel 		if (why == CRMKDIR)
   1502   5331       amw 			/*
   1503   5331       amw 			 * N.B., if vn_createat() ever requests
   1504   5331       amw 			 * case-insensitive behavior then it will need
   1505   5331       amw 			 * to be passed to VOP_MKDIR().  VOP_CREATE()
   1506   5331       amw 			 * will already get it via "flag"
   1507   5331       amw 			 */
   1508   5331       amw 			error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
   1509   5331       amw 			    NULL, 0, NULL);
   1510      0    stevel 		else if (!must_be_dir)
   1511      0    stevel 			error = VOP_CREATE(dvp, pn.pn_path, vap,
   1512   5331       amw 			    excl, mode, vpp, CRED(), flag, NULL, NULL);
   1513      0    stevel 		else
   1514      0    stevel 			error = ENOTDIR;
   1515      0    stevel 	}
   1516      0    stevel 
   1517      0    stevel out:
   1518      0    stevel 
   1519      0    stevel 	if (audit_active)
   1520      0    stevel 		audit_vncreate_finish(*vpp, error);
   1521      0    stevel 	if (in_crit) {
   1522      0    stevel 		nbl_end_crit(vp);
   1523      0    stevel 		in_crit = 0;
   1524      0    stevel 	}
   1525      0    stevel 	if (vp != NULL) {
   1526      0    stevel 		VN_RELE(vp);
   1527      0    stevel 		vp = NULL;
   1528      0    stevel 	}
   1529      0    stevel 	pn_free(&pn);
   1530      0    stevel 	VN_RELE(dvp);
   1531      0    stevel 	/*
   1532      0    stevel 	 * The following clause was added to handle a problem
   1533      0    stevel 	 * with NFS consistency.  It is possible that a lookup
   1534      0    stevel 	 * of the file to be created succeeded, but the file
   1535      0    stevel 	 * itself doesn't actually exist on the server.  This
   1536      0    stevel 	 * is chiefly due to the DNLC containing an entry for
   1537      0    stevel 	 * the file which has been removed on the server.  In
   1538      0    stevel 	 * this case, we just start over.  If there was some
   1539      0    stevel 	 * other cause for the ESTALE error, then the lookup
   1540      0    stevel 	 * of the file will fail and the error will be returned
   1541      0    stevel 	 * above instead of looping around from here.
   1542      0    stevel 	 */
   1543   2051  prabahar 	if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1544      0    stevel 		goto top;
   1545      0    stevel 	return (error);
   1546      0    stevel }
   1547      0    stevel 
   1548      0    stevel int
   1549      0    stevel vn_link(char *from, char *to, enum uio_seg seg)
   1550      0    stevel {
   1551      0    stevel 	struct vnode *fvp;		/* from vnode ptr */
   1552      0    stevel 	struct vnode *tdvp;		/* to directory vnode ptr */
   1553      0    stevel 	struct pathname pn;
   1554      0    stevel 	int error;
   1555      0    stevel 	struct vattr vattr;
   1556      0    stevel 	dev_t fsid;
   1557   2051  prabahar 	int estale_retry = 0;
   1558      0    stevel 
   1559      0    stevel top:
   1560      0    stevel 	fvp = tdvp = NULL;
   1561      0    stevel 	if (error = pn_get(to, seg, &pn))
   1562      0    stevel 		return (error);
   1563      0    stevel 	if (error = lookupname(from, seg, NO_FOLLOW, NULLVPP, &fvp))
   1564      0    stevel 		goto out;
   1565      0    stevel 	if (error = lookuppn(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP))
   1566      0    stevel 		goto out;
   1567      0    stevel 	/*
   1568      0    stevel 	 * Make sure both source vnode and target directory vnode are
   1569      0    stevel 	 * in the same vfs and that it is writeable.
   1570      0    stevel 	 */
   1571      0    stevel 	vattr.va_mask = AT_FSID;
   1572   5331       amw 	if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
   1573      0    stevel 		goto out;
   1574      0    stevel 	fsid = vattr.va_fsid;
   1575      0    stevel 	vattr.va_mask = AT_FSID;
   1576   5331       amw 	if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
   1577      0    stevel 		goto out;
   1578      0    stevel 	if (fsid != vattr.va_fsid) {
   1579      0    stevel 		error = EXDEV;
   1580      0    stevel 		goto out;
   1581      0    stevel 	}
   1582      0    stevel 	if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
   1583      0    stevel 		error = EROFS;
   1584      0    stevel 		goto out;
   1585      0    stevel 	}
   1586      0    stevel 	/*
   1587      0    stevel 	 * Do the link.
   1588      0    stevel 	 */
   1589      0    stevel 	(void) pn_fixslash(&pn);
   1590   5331       amw 	error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
   1591      0    stevel out:
   1592      0    stevel 	pn_free(&pn);
   1593      0    stevel 	if (fvp)
   1594      0    stevel 		VN_RELE(fvp);
   1595      0    stevel 	if (tdvp)
   1596      0    stevel 		VN_RELE(tdvp);
   1597   2051  prabahar 	if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1598      0    stevel 		goto top;
   1599      0    stevel 	return (error);
   1600      0    stevel }
   1601      0    stevel 
   1602      0    stevel int
   1603      0    stevel vn_rename(char *from, char *to, enum uio_seg seg)
   1604      0    stevel {
   1605      0    stevel 	return (vn_renameat(NULL, from, NULL, to, seg));
   1606      0    stevel }
   1607      0    stevel 
   1608      0    stevel int
   1609      0    stevel vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
   1610      0    stevel 		char *tname, enum uio_seg seg)
   1611      0    stevel {
   1612      0    stevel 	int error;
   1613      0    stevel 	struct vattr vattr;
   1614      0    stevel 	struct pathname fpn;		/* from pathname */
   1615      0    stevel 	struct pathname tpn;		/* to pathname */
   1616      0    stevel 	dev_t fsid;
   1617   5331       amw 	int in_crit_src, in_crit_targ;
   1618      0    stevel 	vnode_t *fromvp, *fvp;
   1619   5331       amw 	vnode_t *tovp, *targvp;
   1620   5331       amw 	int estale_retry = 0;
   1621   5331       amw 
   1622   5331       amw top:
   1623   5331       amw 	fvp = fromvp = tovp = targvp = NULL;
   1624   5331       amw 	in_crit_src = in_crit_targ = 0;
   1625      0    stevel 	/*
   1626      0    stevel 	 * Get to and from pathnames.
   1627      0    stevel 	 */
   1628      0    stevel 	if (error = pn_get(fname, seg, &fpn))
   1629      0    stevel 		return (error);
   1630      0    stevel 	if (error = pn_get(tname, seg, &tpn)) {
   1631      0    stevel 		pn_free(&fpn);
   1632      0    stevel 		return (error);
   1633      0    stevel 	}
   1634      0    stevel 
   1635      0    stevel 	/*
   1636      0    stevel 	 * First we need to resolve the correct directories
   1637      0    stevel 	 * The passed in directories may only be a starting point,
   1638      0    stevel 	 * but we need the real directories the file(s) live in.
   1639      0    stevel 	 * For example the fname may be something like usr/lib/sparc
   1640      0    stevel 	 * and we were passed in the / directory, but we need to
   1641      0    stevel 	 * use the lib directory for the rename.
   1642      0    stevel 	 */
   1643      0    stevel 
   1644      0    stevel 	if (audit_active)
   1645      0    stevel 		audit_setfsat_path(1);
   1646      0    stevel 	/*
   1647      0    stevel 	 * Lookup to and from directories.
   1648      0    stevel 	 */
   1649      0    stevel 	if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
   1650      0    stevel 		goto out;
   1651      0    stevel 	}
   1652      0    stevel 
   1653      0    stevel 	/*
   1654      0    stevel 	 * Make sure there is an entry.
   1655      0    stevel 	 */
   1656      0    stevel 	if (fvp == NULL) {
   1657      0    stevel 		error = ENOENT;
   1658      0    stevel 		goto out;
   1659      0    stevel 	}
   1660      0    stevel 
   1661      0    stevel 	if (audit_active)
   1662      0    stevel 		audit_setfsat_path(3);
   1663   5331       amw 	if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
   1664      0    stevel 		goto out;
   1665      0    stevel 	}
   1666      0    stevel 
   1667      0    stevel 	/*
   1668      0    stevel 	 * Make sure both the from vnode directory and the to directory
   1669      0    stevel 	 * are in the same vfs and the to directory is writable.
   1670      0    stevel 	 * We check fsid's, not vfs pointers, so loopback fs works.
   1671      0    stevel 	 */
   1672      0    stevel 	if (fromvp != tovp) {
   1673      0    stevel 		vattr.va_mask = AT_FSID;
   1674   5331       amw 		if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
   1675      0    stevel 			goto out;
   1676      0    stevel 		fsid = vattr.va_fsid;
   1677      0    stevel 		vattr.va_mask = AT_FSID;
   1678   5331       amw 		if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
   1679      0    stevel 			goto out;
   1680      0    stevel 		if (fsid != vattr.va_fsid) {
   1681      0    stevel 			error = EXDEV;
   1682      0    stevel 			goto out;
   1683      0    stevel 		}
   1684      0    stevel 	}
   1685      0    stevel 
   1686      0    stevel 	if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
   1687      0    stevel 		error = EROFS;
   1688      0    stevel 		goto out;
   1689      0    stevel 	}
   1690      0    stevel 
   1691   5331       amw 	if (targvp && (fvp != targvp)) {
   1692   5331       amw 		nbl_start_crit(targvp, RW_READER);
   1693   5331       amw 		in_crit_targ = 1;
   1694   5331       amw 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
   1695   5331       amw 			error = EACCES;
   1696   5331       amw 			goto out;
   1697   5331       amw 		}
   1698   5331       amw 	}
   1699   5331       amw 
   1700      0    stevel 	if (nbl_need_check(fvp)) {
   1701      0    stevel 		nbl_start_crit(fvp, RW_READER);
   1702   5331       amw 		in_crit_src = 1;
   1703   5331       amw 		if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
   1704      0    stevel 			error = EACCES;
   1705      0    stevel 			goto out;
   1706      0    stevel 		}
   1707      0    stevel 	}
   1708      0    stevel 
   1709      0    stevel 	/*
   1710      0    stevel 	 * Do the rename.
   1711      0    stevel 	 */
   1712      0    stevel 	(void) pn_fixslash(&tpn);
   1713   5331       amw 	error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
   1714   5331       amw 	    NULL, 0);
   1715      0    stevel 
   1716      0    stevel out:
   1717      0    stevel 	pn_free(&fpn);
   1718      0    stevel 	pn_free(&tpn);
   1719   5331       amw 	if (in_crit_src)
   1720      0    stevel 		nbl_end_crit(fvp);
   1721   5331       amw 	if (in_crit_targ)
   1722   5331       amw 		nbl_end_crit(targvp);
   1723      0    stevel 	if (fromvp)
   1724      0    stevel 		VN_RELE(fromvp);
   1725      0    stevel 	if (tovp)
   1726      0    stevel 		VN_RELE(tovp);
   1727   5331       amw 	if (targvp)
   1728   5331       amw 		VN_RELE(targvp);
   1729      0    stevel 	if (fvp)
   1730      0    stevel 		VN_RELE(fvp);
   1731   2051  prabahar 	if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1732      0    stevel 		goto top;
   1733      0    stevel 	return (error);
   1734      0    stevel }
   1735      0    stevel 
   1736      0    stevel /*
   1737      0    stevel  * Remove a file or directory.
   1738      0    stevel  */
   1739      0    stevel int
   1740      0    stevel vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
   1741      0    stevel {
   1742      0    stevel 	return (vn_removeat(NULL, fnamep, seg, dirflag));
   1743      0    stevel }
   1744      0    stevel 
   1745      0    stevel int
   1746      0    stevel vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
   1747      0    stevel {
   1748      0    stevel 	struct vnode *vp;		/* entry vnode */
   1749      0    stevel 	struct vnode *dvp;		/* ptr to parent dir vnode */
   1750      0    stevel 	struct vnode *coveredvp;
   1751      0    stevel 	struct pathname pn;		/* name of entry */
   1752      0    stevel 	enum vtype vtype;
   1753      0    stevel 	int error;
   1754      0    stevel 	struct vfs *vfsp;
   1755      0    stevel 	struct vfs *dvfsp;	/* ptr to parent dir vfs */
   1756      0    stevel 	int in_crit = 0;
   1757   2051  prabahar 	int estale_retry = 0;
   1758      0    stevel 
   1759      0    stevel top:
   1760      0    stevel 	if (error = pn_get(fnamep, seg, &pn))
   1761      0    stevel 		return (error);
   1762      0    stevel 	dvp = vp = NULL;
   1763      0    stevel 	if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
   1764      0    stevel 		pn_free(&pn);
   1765   2051  prabahar 		if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1766      0    stevel 			goto top;
   1767      0    stevel 		return (error);
   1768      0    stevel 	}
   1769      0    stevel 
   1770      0    stevel 	/*
   1771      0    stevel 	 * Make sure there is an entry.
   1772      0    stevel 	 */
   1773      0    stevel 	if (vp == NULL) {
   1774      0    stevel 		error = ENOENT;
   1775      0    stevel 		goto out;
   1776      0    stevel 	}
   1777      0    stevel 
   1778      0    stevel 	vfsp = vp->v_vfsp;
   1779      0    stevel 	dvfsp = dvp->v_vfsp;
   1780      0    stevel 
   1781      0    stevel 	/*
   1782      0    stevel 	 * If the named file is the root of a mounted filesystem, fail,
   1783      0    stevel 	 * unless it's marked unlinkable.  In that case, unmount the
   1784      0    stevel 	 * filesystem and proceed to unlink the covered vnode.  (If the
   1785      0    stevel 	 * covered vnode is a directory, use rmdir instead of unlink,
   1786      0    stevel 	 * to avoid file system corruption.)
   1787      0    stevel 	 */
   1788      0    stevel 	if (vp->v_flag & VROOT) {
   1789   4956  pf199842 		if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
   1790   4956  pf199842 			error = EBUSY;
   1791   4956  pf199842 			goto out;
   1792   4956  pf199842 		}
   1793   4956  pf199842 
   1794   4956  pf199842 		/*
   1795   4956  pf199842 		 * Namefs specific code starts here.
   1796   4956  pf199842 		 */
   1797   4956  pf199842 
   1798   4956  pf199842 		if (dirflag == RMDIRECTORY) {
   1799      0    stevel 			/*
   1800   4956  pf199842 			 * User called rmdir(2) on a file that has
   1801   4956  pf199842 			 * been namefs mounted on top of.  Since
   1802   4956  pf199842 			 * namefs doesn't allow directories to
   1803   4956  pf199842 			 * be mounted on other files we know
   1804   4956  pf199842 			 * vp is not of type VDIR so fail to operation.
   1805      0    stevel 			 */
   1806   4956  pf199842 			error = ENOTDIR;
   1807   4956  pf199842 			goto out;
   1808   4956  pf199842 		}
   1809   4956  pf199842 
   1810   4956  pf199842 		/*
   1811   4956  pf199842 		 * If VROOT is still set after grabbing vp->v_lock,
   1812   4956  pf199842 		 * noone has finished nm_unmount so far and coveredvp
   1813   4956  pf199842 		 * is valid.
   1814   4956  pf199842 		 * If we manage to grab vn_vfswlock(coveredvp) before releasing
   1815   4956  pf199842 		 * vp->v_lock, any race window is eliminated.
   1816   4956  pf199842 		 */
   1817   4956  pf199842 
   1818   4956  pf199842 		mutex_enter(&vp->v_lock);
   1819   4956  pf199842 		if ((vp->v_flag & VROOT) == 0) {
   1820   4956  pf199842 			/* Someone beat us to the unmount */
   1821   4956  pf199842 			mutex_exit(&vp->v_lock);
   1822      0    stevel 			error = EBUSY;
   1823   4956  pf199842 			goto out;
   1824   4956  pf199842 		}
   1825   4956  pf199842 		vfsp = vp->v_vfsp;
   1826   4956  pf199842 		coveredvp = vfsp->vfs_vnodecovered;
   1827   4956  pf199842 		ASSERT(coveredvp);
   1828   4956  pf199842 		/*
   1829   4956  pf199842 		 * Note: Implementation of vn_vfswlock shows that ordering of
   1830   4956  pf199842 		 * v_lock / vn_vfswlock is not an issue here.
   1831   4956  pf199842 		 */
   1832   4956  pf199842 		error = vn_vfswlock(coveredvp);
   1833   4956  pf199842 		mutex_exit(&vp->v_lock);
   1834   4956  pf199842 
   1835   4956  pf199842 		if (error)
   1836   4956  pf199842 			goto out;
   1837   4956  pf199842 
   1838   4956  pf199842 		VN_HOLD(coveredvp);
   1839   4956  pf199842 		VN_RELE(vp);
   1840   4956  pf199842 		error = dounmount(vfsp, 0, CRED());
   1841   4956  pf199842 
   1842   4956  pf199842 		/*
   1843   4956  pf199842 		 * Unmounted the namefs file system; now get
   1844   4956  pf199842 		 * the object it was mounted over.
   1845   4956  pf199842 		 */
   1846   4956  pf199842 		vp = coveredvp;
   1847   4956  pf199842 		/*
   1848   4956  pf199842 		 * If namefs was mounted over a directory, then
   1849   4956  pf199842 		 * we want to use rmdir() instead of unlink().
   1850   4956  pf199842 		 */
   1851   4956  pf199842 		if (vp->v_type == VDIR)
   1852   4956  pf199842 			dirflag = RMDIRECTORY;
   1853      0    stevel 
   1854      0    stevel 		if (error)
   1855      0    stevel 			goto out;
   1856      0    stevel 	}
   1857      0    stevel 
   1858      0    stevel 	/*
   1859      0    stevel 	 * Make sure filesystem is writeable.
   1860      0    stevel 	 * We check the parent directory's vfs in case this is an lofs vnode.
   1861      0    stevel 	 */
   1862      0    stevel 	if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
   1863      0    stevel 		error = EROFS;
   1864      0    stevel 		goto out;
   1865      0    stevel 	}
   1866      0    stevel 
   1867      0    stevel 	vtype = vp->v_type;
   1868      0    stevel 
   1869      0    stevel 	/*
   1870      0    stevel 	 * If there is the possibility of an nbmand share reservation, make
   1871      0    stevel 	 * sure it's okay to remove the file.  Keep a reference to the
   1872      0    stevel 	 * vnode, so that we can exit the nbl critical region after
   1873      0    stevel 	 * calling VOP_REMOVE.
   1874      0    stevel 	 * If there is no possibility of an nbmand share reservation,
   1875      0    stevel 	 * release the vnode reference now.  Filesystems like NFS may
   1876      0    stevel 	 * behave differently if there is an extra reference, so get rid of
   1877      0    stevel 	 * this one.  Fortunately, we can't have nbmand mounts on NFS
   1878      0    stevel 	 * filesystems.
   1879      0    stevel 	 */
   1880      0    stevel 	if (nbl_need_check(vp)) {
   1881      0    stevel 		nbl_start_crit(vp, RW_READER);
   1882      0    stevel 		in_crit = 1;
   1883   5331       amw 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
   1884      0    stevel 			error = EACCES;
   1885      0    stevel 			goto out;
   1886      0    stevel 		}
   1887      0    stevel 	} else {
   1888      0    stevel 		VN_RELE(vp);
   1889      0    stevel 		vp = NULL;
   1890      0    stevel 	}
   1891      0    stevel 
   1892      0    stevel 	if (dirflag == RMDIRECTORY) {
   1893      0    stevel 		/*
   1894      0    stevel 		 * Caller is using rmdir(2), which can only be applied to
   1895      0    stevel 		 * directories.
   1896      0    stevel 		 */
   1897      0    stevel 		if (vtype != VDIR) {
   1898      0    stevel 			error = ENOTDIR;
   1899      0    stevel 		} else {
   1900      0    stevel 			vnode_t *cwd;
   1901      0    stevel 			proc_t *pp = curproc;
   1902      0    stevel 
   1903      0    stevel 			mutex_enter(&pp->p_lock);
   1904      0    stevel 			cwd = PTOU(pp)->u_cdir;
   1905      0    stevel 			VN_HOLD(cwd);
   1906      0    stevel 			mutex_exit(&pp->p_lock);
   1907   5331       amw 			error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
   1908   5331       amw 			    NULL, 0);
   1909      0    stevel 			VN_RELE(cwd);
   1910      0    stevel 		}
   1911      0    stevel 	} else {
   1912      0    stevel 		/*
   1913      0    stevel 		 * Unlink(2) can be applied to anything.
   1914      0    stevel 		 */
   1915   5331       amw 		error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
   1916      0    stevel 	}
   1917      0    stevel 
   1918      0    stevel out:
   1919      0    stevel 	pn_free(&pn);
   1920      0    stevel 	if (in_crit) {
   1921      0    stevel 		nbl_end_crit(vp);
   1922      0    stevel 		in_crit = 0;
   1923      0    stevel 	}
   1924      0    stevel 	if (vp != NULL)
   1925      0    stevel 		VN_RELE(vp);
   1926      0    stevel 	if (dvp != NULL)
   1927      0    stevel 		VN_RELE(dvp);
   1928   2051  prabahar 	if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
   1929      0    stevel 		goto top;
   1930      0    stevel 	return (error);
   1931      0    stevel }
   1932      0    stevel 
   1933      0    stevel /*
   1934      0    stevel  * Utility function to compare equality of vnodes.
   1935      0    stevel  * Compare the underlying real vnodes, if there are underlying vnodes.
   1936      0    stevel  * This is a more thorough comparison than the VN_CMP() macro provides.
   1937      0    stevel  */
   1938      0    stevel int
   1939      0    stevel vn_compare(vnode_t *vp1, vnode_t *vp2)
   1940      0    stevel {
   1941      0    stevel 	vnode_t *realvp;
   1942      0    stevel 
   1943   5331       amw 	if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
   1944      0    stevel 		vp1 = realvp;
   1945   5331       amw 	if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
   1946      0    stevel 		vp2 = realvp;
   1947      0    stevel 	return (VN_CMP(vp1, vp2));
   1948      0    stevel }
   1949      0    stevel 
   1950      0    stevel /*
   1951      0    stevel  * The number of locks to hash into.  This value must be a power
   1952      0    stevel  * of 2 minus 1 and should probably also be prime.
   1953      0    stevel  */
   1954      0    stevel #define	NUM_BUCKETS	1023
   1955      0    stevel 
   1956      0    stevel struct  vn_vfslocks_bucket {
   1957      0    stevel 	kmutex_t vb_lock;
   1958      0    stevel 	vn_vfslocks_entry_t *vb_list;
   1959      0    stevel 	char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
   1960      0    stevel };
   1961      0    stevel 
   1962      0    stevel /*
   1963      0    stevel  * Total number of buckets will be NUM_BUCKETS + 1 .
   1964      0    stevel  */
   1965      0    stevel 
   1966      0    stevel #pragma	align	64(vn_vfslocks_buckets)
   1967      0    stevel static	struct vn_vfslocks_bucket	vn_vfslocks_buckets[NUM_BUCKETS + 1];
   1968      0    stevel 
   1969      0    stevel #define	VN_VFSLOCKS_SHIFT	9
   1970      0    stevel 
   1971      0    stevel #define	VN_VFSLOCKS_HASH(vfsvpptr)	\
   1972      0    stevel 	((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
   1973      0    stevel 
   1974      0    stevel /*
   1975      0    stevel  * vn_vfslocks_getlock() uses an HASH scheme to generate
   1976      0    stevel  * rwstlock using vfs/vnode pointer passed to it.
   1977      0    stevel  *
   1978      0    stevel  * vn_vfslocks_rele() releases a reference in the
   1979      0    stevel  * HASH table which allows the entry allocated by
   1980      0    stevel  * vn_vfslocks_getlock() to be freed at a later
   1981      0    stevel  * stage when the refcount drops to zero.
   1982      0    stevel  */
   1983      0    stevel 
   1984      0    stevel vn_vfslocks_entry_t *
   1985      0    stevel vn_vfslocks_getlock(void *vfsvpptr)
   1986      0    stevel {
   1987      0    stevel 	struct vn_vfslocks_bucket *bp;
   1988      0    stevel 	vn_vfslocks_entry_t *vep;
   1989      0    stevel 	vn_vfslocks_entry_t *tvep;
   1990      0    stevel 
   1991      0    stevel 	ASSERT(vfsvpptr != NULL);
   1992      0    stevel 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
   1993      0    stevel 
   1994      0    stevel 	mutex_enter(&bp->vb_lock);
   1995      0    stevel 	for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
   1996      0    stevel 		if (vep->ve_vpvfs == vfsvpptr) {
   1997      0    stevel 			vep->ve_refcnt++;
   1998      0    stevel 			mutex_exit(&bp->vb_lock);
   1999      0    stevel 			return (vep);
   2000      0    stevel 		}
   2001      0    stevel 	}
   2002      0    stevel 	mutex_exit(&bp->vb_lock);
   2003      0    stevel 	vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
   2004      0    stevel 	rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
   2005      0    stevel 	vep->ve_vpvfs = (char *)vfsvpptr;
   2006      0    stevel 	vep->ve_refcnt = 1;
   2007      0    stevel 	mutex_enter(&bp->vb_lock);
   2008      0    stevel 	for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
   2009      0    stevel 		if (tvep->ve_vpvfs == vfsvpptr) {
   2010      0    stevel 			tvep->ve_refcnt++;
   2011      0    stevel 			mutex_exit(&bp->vb_lock);
   2012      0    stevel 
   2013      0    stevel 			/*
   2014      0    stevel 			 * There is already an entry in the hash
   2015      0    stevel 			 * destroy what we just allocated.
   2016      0    stevel 			 */
   2017      0    stevel 			rwst_destroy(&vep->ve_lock);
   2018      0    stevel 			kmem_free(vep, sizeof (*vep));
   2019      0    stevel 			return (tvep);
   2020      0    stevel 		}
   2021      0    stevel 	}
   2022      0    stevel 	vep->ve_next = bp->vb_list;
   2023      0    stevel 	bp->vb_list = vep;
   2024      0    stevel 	mutex_exit(&bp->vb_lock);
   2025      0    stevel 	return (vep);
   2026      0    stevel }
   2027      0    stevel 
   2028      0    stevel void
   2029      0    stevel vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
   2030      0    stevel {
   2031      0    stevel 	struct vn_vfslocks_bucket *bp;
   2032      0    stevel 	vn_vfslocks_entry_t *vep;
   2033      0    stevel 	vn_vfslocks_entry_t *pvep;
   2034      0    stevel 
   2035      0    stevel 	ASSERT(vepent != NULL);
   2036      0    stevel 	ASSERT(vepent->ve_vpvfs != NULL);
   2037      0    stevel 
   2038      0    stevel 	bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
   2039      0    stevel 
   2040      0    stevel 	mutex_enter(&bp->vb_lock);
   2041      0    stevel 	vepent->ve_refcnt--;
   2042      0    stevel 
   2043      0    stevel 	if ((int32_t)vepent->ve_refcnt < 0)
   2044      0    stevel 		cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
   2045      0    stevel 
   2046      0    stevel 	if (vepent->ve_refcnt == 0) {
   2047      0    stevel 		for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
   2048      0    stevel 			if (vep->ve_vpvfs == vepent->ve_vpvfs) {
   2049      0    stevel 				if (bp->vb_list == vep)
   2050      0    stevel 					bp->vb_list = vep->ve_next;
   2051      0    stevel 				else {
   2052      0    stevel 					/* LINTED */
   2053      0    stevel 					pvep->ve_next = vep->ve_next;
   2054      0    stevel 				}
   2055      0    stevel 				mutex_exit(&bp->vb_lock);
   2056      0    stevel 				rwst_destroy(&vep->ve_lock);
   2057      0    stevel 				kmem_free(vep, sizeof (*vep));
   2058      0    stevel 				return;
   2059      0    stevel 			}
   2060      0    stevel 			pvep = vep;
   2061      0    stevel 		}
   2062      0    stevel 		cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
   2063      0    stevel 	}
   2064      0    stevel 	mutex_exit(&bp->vb_lock);
   2065      0    stevel }
   2066      0    stevel 
   2067      0    stevel /*
   2068      0    stevel  * vn_vfswlock_wait is used to implement a lock which is logically a writers
   2069      0    stevel  * lock protecting the v_vfsmountedhere field.
   2070      0    stevel  * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
   2071      0    stevel  * except that it blocks to acquire the lock VVFSLOCK.
   2072      0    stevel  *
   2073      0    stevel  * traverse() and routines re-implementing part of traverse (e.g. autofs)
   2074      0    stevel  * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
   2075      0    stevel  * need the non-blocking version of the writers lock i.e. vn_vfswlock
   2076      0    stevel  */
   2077      0    stevel int
   2078      0    stevel vn_vfswlock_wait(vnode_t *vp)
   2079      0    stevel {
   2080      0    stevel 	int retval;
   2081      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2082      0    stevel 	ASSERT(vp != NULL);
   2083      0    stevel 
   2084      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2085      0    stevel 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
   2086      0    stevel 
   2087      0    stevel 	if (retval == EINTR) {
   2088      0    stevel 		vn_vfslocks_rele(vpvfsentry);
   2089      0    stevel 		return (EINTR);
   2090      0    stevel 	}
   2091      0    stevel 	return (retval);
   2092      0    stevel }
   2093      0    stevel 
   2094      0    stevel int
   2095      0    stevel vn_vfsrlock_wait(vnode_t *vp)
   2096      0    stevel {
   2097      0    stevel 	int retval;
   2098      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2099      0    stevel 	ASSERT(vp != NULL);
   2100      0    stevel 
   2101      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2102      0    stevel 	retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
   2103      0    stevel 
   2104      0    stevel 	if (retval == EINTR) {
   2105      0    stevel 		vn_vfslocks_rele(vpvfsentry);
   2106      0    stevel 		return (EINTR);
   2107      0    stevel 	}
   2108      0    stevel 
   2109      0    stevel 	return (retval);
   2110      0    stevel }
   2111      0    stevel 
   2112      0    stevel 
   2113      0    stevel /*
   2114      0    stevel  * vn_vfswlock is used to implement a lock which is logically a writers lock
   2115      0    stevel  * protecting the v_vfsmountedhere field.
   2116      0    stevel  */
   2117      0    stevel int
   2118      0    stevel vn_vfswlock(vnode_t *vp)
   2119      0    stevel {
   2120      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2121      0    stevel 
   2122      0    stevel 	/*
   2123      0    stevel 	 * If vp is NULL then somebody is trying to lock the covered vnode
   2124      0    stevel 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
   2125      0    stevel 	 * only happen when unmounting /.  Since that operation will fail
   2126      0    stevel 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
   2127      0    stevel 	 */
   2128      0    stevel 	if (vp == NULL)
   2129      0    stevel 		return (EBUSY);
   2130      0    stevel 
   2131      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2132      0    stevel 
   2133      0    stevel 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
   2134      0    stevel 		return (0);
   2135      0    stevel 
   2136      0    stevel 	vn_vfslocks_rele(vpvfsentry);
   2137      0    stevel 	return (EBUSY);
   2138      0    stevel }
   2139      0    stevel 
   2140      0    stevel int
   2141      0    stevel vn_vfsrlock(vnode_t *vp)
   2142      0    stevel {
   2143      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2144      0    stevel 
   2145      0    stevel 	/*
   2146      0    stevel 	 * If vp is NULL then somebody is trying to lock the covered vnode
   2147      0    stevel 	 * of /.  (vfs_vnodecovered is NULL for /).  This situation will
   2148      0    stevel 	 * only happen when unmounting /.  Since that operation will fail
   2149      0    stevel 	 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
   2150      0    stevel 	 */
   2151      0    stevel 	if (vp == NULL)
   2152      0    stevel 		return (EBUSY);
   2153      0    stevel 
   2154      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2155      0    stevel 
   2156      0    stevel 	if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
   2157      0    stevel 		return (0);
   2158      0    stevel 
   2159      0    stevel 	vn_vfslocks_rele(vpvfsentry);
   2160      0    stevel 	return (EBUSY);
   2161      0    stevel }
   2162      0    stevel 
   2163      0    stevel void
   2164      0    stevel vn_vfsunlock(vnode_t *vp)
   2165      0    stevel {
   2166      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2167      0    stevel 
   2168      0    stevel 	/*
   2169      0    stevel 	 * ve_refcnt needs to be decremented twice.
   2170      0    stevel 	 * 1. To release refernce after a call to vn_vfslocks_getlock()
   2171      0    stevel 	 * 2. To release the reference from the locking routines like
   2172      0    stevel 	 *    vn_vfsrlock/vn_vfswlock etc,.
   2173      0    stevel 	 */
   2174      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2175      0    stevel 	vn_vfslocks_rele(vpvfsentry);
   2176      0    stevel 
   2177      0    stevel 	rwst_exit(&vpvfsentry->ve_lock);
   2178      0    stevel 	vn_vfslocks_rele(vpvfsentry);
   2179      0    stevel }
   2180      0    stevel 
   2181      0    stevel int
   2182      0    stevel vn_vfswlock_held(vnode_t *vp)
   2183      0    stevel {
   2184      0    stevel 	int held;
   2185      0    stevel 	vn_vfslocks_entry_t *vpvfsentry;
   2186      0    stevel 
   2187      0    stevel 	ASSERT(vp != NULL);
   2188      0    stevel 
   2189      0    stevel 	vpvfsentry = vn_vfslocks_getlock(vp);
   2190      0    stevel 	held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
   2191      0    stevel 
   2192      0    stevel 	vn_vfslocks_rele(vpvfsentry);
   2193      0    stevel 	return (held);
   2194      0    stevel }
   2195      0    stevel 
   2196      0    stevel 
   2197      0    stevel int
   2198      0    stevel vn_make_ops(
   2199      0    stevel 	const char *name,			/* Name of file system */
   2200      0    stevel 	const fs_operation_def_t *templ,	/* Operation specification */
   2201      0    stevel 	vnodeops_t **actual)			/* Return the vnodeops */
   2202      0    stevel {
   2203      0    stevel 	int unused_ops;
   2204      0    stevel 	int error;
   2205      0    stevel 
   2206      0    stevel 	*actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
   2207      0    stevel 
   2208      0    stevel 	(*actual)->vnop_name = name;
   2209      0    stevel 
   2210      0    stevel 	error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
   2211      0    stevel 	if (error) {
   2212      0    stevel 		kmem_free(*actual, sizeof (vnodeops_t));
   2213      0    stevel 	}
   2214      0    stevel 
   2215      0    stevel #if DEBUG
   2216      0    stevel 	if (unused_ops != 0)
   2217      0    stevel 		cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
   2218      0    stevel 		    "but not used", name, unused_ops);
   2219      0    stevel #endif
   2220      0    stevel 
   2221      0    stevel 	return (error);
   2222      0    stevel }
   2223      0    stevel 
   2224      0    stevel /*
   2225      0    stevel  * Free the vnodeops created as a result of vn_make_ops()
   2226      0    stevel  */
   2227      0    stevel void
   2228      0    stevel vn_freevnodeops(vnodeops_t *vnops)
   2229      0    stevel {
   2230      0    stevel 	kmem_free(vnops, sizeof (vnodeops_t));
   2231      0    stevel }
   2232      0    stevel 
   2233      0    stevel /*
   2234      0    stevel  * Vnode cache.
   2235      0    stevel  */
   2236      0    stevel 
   2237      0    stevel /* ARGSUSED */
   2238      0    stevel static int
   2239      0    stevel vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
   2240      0    stevel {
   2241      0    stevel 	struct vnode *vp;
   2242      0    stevel 
   2243      0    stevel 	vp = buf;
   2244      0    stevel 
   2245      0    stevel 	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
   2246   9885    Robert 	mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
   2247      0    stevel 	cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
   2248      0    stevel 	rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
   2249      0    stevel 	vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
   2250      0    stevel 	vp->v_path = NULL;
   2251      0    stevel 	vp->v_mpssdata = NULL;
   2252   5050   jwahlig 	vp->v_vsd = NULL;
   2253   4863     praks 	vp->v_fopdata = NULL;
   2254      0    stevel 
   2255      0    stevel 	return (0);
   2256      0    stevel }
   2257      0    stevel 
   2258      0    stevel /* ARGSUSED */
   2259      0    stevel static void
   2260      0    stevel vn_cache_destructor(void *buf, void *cdrarg)
   2261      0    stevel {
   2262      0    stevel 	struct vnode *vp;
   2263      0    stevel 
   2264      0    stevel 	vp = buf;
   2265      0    stevel 
   2266      0    stevel 	rw_destroy(&vp->v_nbllock);
   2267      0    stevel 	cv_destroy(&vp->v_cv);
   2268   9885    Robert 	mutex_destroy(&vp->v_vsd_lock);
   2269      0    stevel 	mutex_destroy(&vp->v_lock);
   2270      0    stevel }
   2271      0    stevel 
   2272      0    stevel void
   2273      0    stevel vn_create_cache(void)
   2274      0    stevel {
   2275      0    stevel 	vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), 64,
   2276      0    stevel 	    vn_cache_constructor, vn_cache_destructor, NULL, NULL,
   2277      0    stevel 	    NULL, 0);
   2278      0    stevel }
   2279      0    stevel 
   2280      0    stevel void
   2281      0    stevel vn_destroy_cache(void)
   2282      0    stevel {
   2283      0    stevel 	kmem_cache_destroy(vn_cache);
   2284      0    stevel }
   2285      0    stevel 
   2286      0    stevel /*
   2287      0    stevel  * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
   2288      0    stevel  * cached by the file system and vnodes remain associated.
   2289      0    stevel  */
   2290      0    stevel void
   2291      0    stevel vn_recycle(vnode_t *vp)
   2292      0    stevel {
   2293      0    stevel 	ASSERT(vp->v_pages == NULL);
   2294      0    stevel 
   2295      0    stevel 	/*
   2296      0    stevel 	 * XXX - This really belongs in vn_reinit(), but we have some issues
   2297      0    stevel 	 * with the counts.  Best to have it here for clean initialization.
   2298      0    stevel 	 */
   2299      0    stevel 	vp->v_rdcnt = 0;
   2300      0    stevel 	vp->v_wrcnt = 0;
   2301      0    stevel 	vp->v_mmap_read = 0;
   2302      0    stevel 	vp->v_mmap_write = 0;
   2303      0    stevel 
   2304      0    stevel 	/*
   2305      0    stevel 	 * If FEM was in use, make sure everything gets cleaned up
   2306      0    stevel 	 * NOTE: vp->v_femhead is initialized to NULL in the vnode
   2307      0    stevel 	 * constructor.
   2308      0    stevel 	 */
   2309      0    stevel 	if (vp->v_femhead) {
   2310      0    stevel 		/* XXX - There should be a free_femhead() that does all this */
   2311      0    stevel 		ASSERT(vp->v_femhead->femh_list == NULL);
   2312      0    stevel 		mutex_destroy(&vp->v_femhead->femh_lock);
   2313      0    stevel 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
   2314      0    stevel 		vp->v_femhead = NULL;
   2315      0    stevel 	}
   2316      0    stevel 	if (vp->v_path) {
   2317      0    stevel 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
   2318      0    stevel 		vp->v_path = NULL;
   2319      0    stevel 	}
   2320   4863     praks 
   2321   4863     praks 	if (vp->v_fopdata != NULL) {
   2322   4863     praks 		free_fopdata(vp);
   2323   4863     praks 	}
   2324      0    stevel 	vp->v_mpssdata = NULL;
   2325   5050   jwahlig 	vsd_free(vp);
   2326      0    stevel }
   2327      0    stevel 
   2328      0    stevel /*
   2329      0    stevel  * Used to reset the vnode fields including those that are directly accessible
   2330      0    stevel  * as well as those which require an accessor function.
   2331      0    stevel  *
   2332      0    stevel  * Does not initialize:
   2333   9885    Robert  *	synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
   2334      0    stevel  *	v_data (since FS-nodes and vnodes point to each other and should
   2335      0    stevel  *		be updated simultaneously)
   2336      0    stevel  *	v_op (in case someone needs to make a VOP call on this object)
   2337      0    stevel  */
   2338      0    stevel void
   2339      0    stevel vn_reinit(vnode_t *vp)
   2340      0    stevel {
   2341      0    stevel 	vp->v_count = 1;
   2342   6712     tomee 	vp->v_count_dnlc = 0;
   2343      0    stevel 	vp->v_vfsp = NULL;
   2344      0    stevel 	vp->v_stream = NULL;
   2345      0    stevel 	vp->v_vfsmountedhere = NULL;
   2346      0    stevel 	vp->v_flag = 0;
   2347      0    stevel 	vp->v_type = VNON;
   2348      0    stevel 	vp->v_rdev = NODEV;
   2349      0    stevel 
   2350      0    stevel 	vp->v_filocks = NULL;
   2351      0    stevel 	vp->v_shrlocks = NULL;
   2352      0    stevel 	vp->v_pages = NULL;
   2353      0    stevel 
   2354      0    stevel 	vp->v_locality = NULL;
   2355   5331       amw 	vp->v_xattrdir = NULL;
   2356      0    stevel 
   2357      0    stevel 	/* Handles v_femhead, v_path, and the r/w/map counts */
   2358      0    stevel 	vn_recycle(vp);
   2359      0    stevel }
   2360      0    stevel 
   2361      0    stevel vnode_t *
   2362      0    stevel vn_alloc(int kmflag)
   2363      0    stevel {
   2364      0    stevel 	vnode_t *vp;
   2365      0    stevel 
   2366      0    stevel 	vp = kmem_cache_alloc(vn_cache, kmflag);
   2367      0    stevel 
   2368      0    stevel 	if (vp != NULL) {
   2369      0    stevel 		vp->v_femhead = NULL;	/* Must be done before vn_reinit() */
   2370   4863     praks 		vp->v_fopdata = NULL;
   2371      0    stevel 		vn_reinit(vp);
   2372      0    stevel 	}
   2373      0    stevel 
   2374      0    stevel 	return (vp);
   2375      0    stevel }
   2376      0    stevel 
   2377      0    stevel void
   2378      0    stevel vn_free(vnode_t *vp)
   2379      0    stevel {
   2380   5331       amw 	ASSERT(vp->v_shrlocks == NULL);
   2381   5331       amw 	ASSERT(vp->v_filocks == NULL);
   2382   5331       amw 
   2383      0    stevel 	/*
   2384      0    stevel 	 * Some file systems call vn_free() with v_count of zero,
   2385      0    stevel 	 * some with v_count of 1.  In any case, the value should
   2386      0    stevel 	 * never be anything else.
   2387      0    stevel 	 */
   2388      0    stevel 	ASSERT((vp->v_count == 0) || (vp->v_count == 1));
   2389   6712     tomee 	ASSERT(vp->v_count_dnlc == 0);
   2390      0    stevel 	if (vp->v_path != NULL) {
   2391      0    stevel 		kmem_free(vp->v_path, strlen(vp->v_path) + 1);
   2392      0    stevel 		vp->v_path = NULL;
   2393      0    stevel 	}
   2394      0    stevel 
   2395      0    stevel 	/* If FEM was in use, make sure everything gets cleaned up */
   2396      0    stevel 	if (vp->v_femhead) {
   2397      0    stevel 		/* XXX - There should be a free_femhead() that does all this */
   2398      0    stevel 		ASSERT(vp->v_femhead->femh_list == NULL);
   2399      0    stevel 		mutex_destroy(&vp->v_femhead->femh_lock);
   2400      0    stevel 		kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
   2401      0    stevel 		vp->v_femhead = NULL;
   2402   4863     praks 	}
   2403   4863     praks 
   2404   4863     praks 	if (vp->v_fopdata != NULL) {
   2405   4863     praks 		free_fopdata(vp);
   2406      0    stevel 	}
   2407      0    stevel 	vp->v_mpssdata = NULL;
   2408   5050   jwahlig 	vsd_free(vp);
   2409      0    stevel 	kmem_cache_free(vn_cache, vp);
   2410      0    stevel }
   2411      0    stevel 
   2412      0    stevel /*
   2413      0    stevel  * vnode status changes, should define better states than 1, 0.
   2414      0    stevel  */
   2415      0    stevel void
   2416      0    stevel vn_reclaim(vnode_t *vp)
   2417      0    stevel {
   2418      0    stevel 	vfs_t   *vfsp = vp->v_vfsp;
   2419      0    stevel 
   2420   1925       rsb 	if (vfsp == NULL ||
   2421   1925       rsb 	    vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
   2422      0    stevel 		return;
   2423      0    stevel 	}
   2424      0    stevel 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
   2425      0    stevel }
   2426      0    stevel 
   2427      0    stevel void
   2428      0    stevel vn_idle(vnode_t *vp)
   2429      0    stevel {
   2430      0    stevel 	vfs_t   *vfsp = vp->v_vfsp;
   2431      0    stevel 
   2432   1925       rsb 	if (vfsp == NULL ||
   2433   1925       rsb 	    vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
   2434      0    stevel 		return;
   2435      0    stevel 	}
   2436      0    stevel 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
   2437      0    stevel }
   2438      0    stevel void
   2439      0    stevel vn_exists(vnode_t *vp)
   2440      0    stevel {
   2441      0    stevel 	vfs_t   *vfsp = vp->v_vfsp;
   2442      0    stevel 
   2443   1925       rsb 	if (vfsp == NULL ||
   2444   1925       rsb 	    vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
   2445      0    stevel 		return;
   2446      0    stevel 	}
   2447      0    stevel 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
   2448      0    stevel }
   2449      0    stevel 
   2450      0    stevel void
   2451      0    stevel vn_invalid(vnode_t *vp)
   2452      0    stevel {
   2453      0    stevel 	vfs_t   *vfsp = vp->v_vfsp;
   2454      0    stevel 
   2455   1925       rsb 	if (vfsp == NULL ||
   2456   1925       rsb 	    vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
   2457      0    stevel 		return;
   2458      0    stevel 	}
   2459      0    stevel 	(void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
   2460      0    stevel }
   2461      0    stevel 
   2462      0    stevel /* Vnode event notification */
   2463      0    stevel 
   2464      0    stevel int
   2465   5331       amw vnevent_support(vnode_t *vp, caller_context_t *ct)
   2466      0    stevel {
   2467      0    stevel 	if (vp == NULL)
   2468      0    stevel 		return (EINVAL);
   2469      0    stevel 
   2470   5331       amw 	return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
   2471   5331       amw }
   2472   5331       amw 
   2473   5331       amw void
   2474   5331       amw vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
   2475   5331       amw {
   2476   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2477   5331       amw 		return;
   2478   5331       amw 	}
   2479   5331       amw 	(void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
   2480   5331       amw }
   2481   5331       amw 
   2482   5331       amw void
   2483   5331       amw vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
   2484   5331       amw     caller_context_t *ct)
   2485   5331       amw {
   2486   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2487   5331       amw 		return;
   2488   5331       amw 	}
   2489   5331       amw 	(void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
   2490   5331       amw }
   2491   5331       amw 
   2492   5331       amw void
   2493   5331       amw vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
   2494   5331       amw {
   2495   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2496   5331       amw 		return;
   2497   5331       amw 	}
   2498   5331       amw 	(void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
   2499   5331       amw }
   2500   5331       amw 
   2501   5331       amw void
   2502   5331       amw vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
   2503   5331       amw {
   2504   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2505   5331       amw 		return;
   2506   5331       amw 	}
   2507   5331       amw 	(void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
   2508   5331       amw }
   2509   5331       amw 
   2510   5331       amw void
   2511   5331       amw vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
   2512   5331       amw {
   2513   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2514   5331       amw 		return;
   2515   5331       amw 	}
   2516   5331       amw 	(void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
   2517   5331       amw }
   2518   5331       amw 
   2519   5331       amw void
   2520   5331       amw vnevent_create(vnode_t *vp, caller_context_t *ct)
   2521   5331       amw {
   2522   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2523   5331       amw 		return;
   2524   5331       amw 	}
   2525   5331       amw 	(void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
   2526   5331       amw }
   2527   5331       amw 
   2528   5331       amw void
   2529   5331       amw vnevent_link(vnode_t *vp, caller_context_t *ct)
   2530   5331       amw {
   2531   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2532   5331       amw 		return;
   2533   5331       amw 	}
   2534   5331       amw 	(void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
   2535   5331       amw }
   2536   5331       amw 
   2537   5331       amw void
   2538   5331       amw vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
   2539   5331       amw {
   2540   5331       amw 	if (vp == NULL || vp->v_femhead == NULL) {
   2541   5331       amw 		return;
   2542   5331       amw 	}
   2543   5331       amw 	(void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
   2544      0    stevel }
   2545      0    stevel 
   2546      0    stevel /*
   2547      0    stevel  * Vnode accessors.
   2548      0    stevel  */
   2549      0    stevel 
   2550      0    stevel int
   2551      0    stevel vn_is_readonly(vnode_t *vp)
   2552      0    stevel {
   2553      0    stevel 	return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
   2554      0    stevel }
   2555      0    stevel 
   2556      0    stevel int
   2557      0    stevel vn_has_flocks(vnode_t *vp)
   2558      0    stevel {
   2559      0    stevel 	return (vp->v_filocks != NULL);
   2560      0    stevel }
   2561      0    stevel 
   2562      0    stevel int
   2563      0    stevel vn_has_mandatory_locks(vnode_t *vp, int mode)
   2564      0    stevel {
   2565      0    stevel 	return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
   2566      0    stevel }
   2567      0    stevel 
   2568      0    stevel int
   2569      0    stevel vn_has_cached_data(vnode_t *vp)
   2570      0    stevel {
   2571      0    stevel 	return (vp->v_pages != NULL);
   2572      0    stevel }
   2573      0    stevel 
   2574      0    stevel /*
   2575      0    stevel  * Return 0 if the vnode in question shouldn't be permitted into a zone via
   2576      0    stevel  * zone_enter(2).
   2577      0    stevel  */
   2578      0    stevel int
   2579      0    stevel vn_can_change_zones(vnode_t *vp)
   2580      0    stevel {
   2581      0    stevel 	struct vfssw *vswp;
   2582      0    stevel 	int allow = 1;
   2583      0    stevel 	vnode_t *rvp;
   2584      0    stevel 
   2585    766  carlsonj 	if (nfs_global_client_only != 0)
   2586    766  carlsonj 		return (1);
   2587    766  carlsonj 
   2588      0    stevel 	/*
   2589      0    stevel 	 * We always want to look at the underlying vnode if there is one.
   2590      0    stevel 	 */
   2591   5331       amw 	if (VOP_REALVP(vp, &rvp, NULL) != 0)
   2592      0    stevel 		rvp = vp;
   2593      0    stevel 	/*
   2594      0    stevel 	 * Some pseudo filesystems (including doorfs) don't actually register
   2595      0    stevel 	 * their vfsops_t, so the following may return NULL; we happily let
   2596      0    stevel 	 * such vnodes switch zones.
   2597      0    stevel 	 */
   2598      0    stevel 	vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
   2599      0    stevel 	if (vswp != NULL) {
   2600      0    stevel 		if (vswp->vsw_flag & VSW_NOTZONESAFE)
   2601      0    stevel 			allow = 0;
   2602      0    stevel 		vfs_unrefvfssw(vswp);
   2603      0    stevel 	}
   2604      0    stevel 	return (allow);
   2605      0    stevel }
   2606      0    stevel 
   2607      0    stevel /*
   2608      0    stevel  * Return nonzero if the vnode is a mount point, zero if not.
   2609      0    stevel  */
   2610      0    stevel int
   2611      0    stevel vn_ismntpt(vnode_t *vp)
   2612      0    stevel {
   2613      0    stevel 	return (vp->v_vfsmountedhere != NULL);
   2614      0    stevel }
   2615      0    stevel 
   2616      0    stevel /* Retrieve the vfs (if any) mounted on this vnode */
   2617      0    stevel vfs_t *
   2618      0    stevel vn_mountedvfs(vnode_t *vp)
   2619      0    stevel {
   2620      0    stevel 	return (vp->v_vfsmountedhere);
   2621   6712     tomee }
   2622   6712     tomee 
   2623   6712     tomee /*
   2624   6712     tomee  * Return nonzero if the vnode is referenced by the dnlc, zero if not.
   2625   6712     tomee  */
   2626   6712     tomee int
   2627   6712     tomee vn_in_dnlc(vnode_t *vp)
   2628   6712     tomee {
   2629   6712     tomee 	return (vp->v_count_dnlc > 0);
   2630   5331       amw }
   2631   5331       amw 
   2632   5331       amw /*
   2633   5331       amw  * vn_has_other_opens() checks whether a particular file is opened by more than
   2634   5331       amw  * just the caller and whether the open is for read and/or write.
   2635   5331       amw  * This routine is for calling after the caller has already called VOP_OPEN()
   2636   5331       amw  * and the caller wishes to know if they are the only one with it open for
   2637   5331       amw  * the mode(s) specified.
   2638   5331       amw  *
   2639   5331       amw  * Vnode counts are only kept on regular files (v_type=VREG).
   2640   5331       amw  */
   2641   5331       amw int
   2642   5331       amw vn_has_other_opens(
   2643   5331       amw 	vnode_t *vp,
   2644   5331       amw 	v_mode_t mode)
   2645   5331       amw {
   2646   5331       amw 
   2647   5331       amw 	ASSERT(vp != NULL);
   2648   5331       amw 
   2649   5331       amw 	switch (mode) {
   2650   5331       amw 	case V_WRITE:
   2651   5331       amw 		if (vp->v_wrcnt > 1)
   2652   5331       amw 			return (V_TRUE);
   2653   5331       amw 		break;
   2654   5331       amw 	case V_RDORWR:
   2655   5331       amw 		if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
   2656   5331       amw 			return (V_TRUE);
   2657   5331       amw 		break;
   2658   5331       amw 	case V_RDANDWR:
   2659   5331       amw 		if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
   2660   5331       amw 			return (V_TRUE);
   2661   5331       amw 		break;
   2662   5331       amw 	case V_READ:
   2663   5331       amw 		if (vp->v_rdcnt > 1)
   2664   5331       amw 			return (V_TRUE);
   2665   5331       amw 		break;
   2666   5331       amw 	}
   2667   5331       amw 
   2668   5331       amw 	return (V_FALSE);
   2669      0    stevel }
   2670      0    stevel 
   2671      0    stevel /*
   2672      0    stevel  * vn_is_opened() checks whether a particular file is opened and
   2673      0    stevel  * whether the open is for read and/or write.
   2674      0    stevel  *
   2675      0    stevel  * Vnode counts are only kept on regular files (v_type=VREG).
   2676      0    stevel  */
   2677      0    stevel int
   2678      0    stevel vn_is_opened(
   2679      0    stevel 	vnode_t *vp,
   2680      0    stevel 	v_mode_t mode)
   2681      0    stevel {
   2682      0    stevel 
   2683      0    stevel 	ASSERT(vp != NULL);
   2684      0    stevel 
   2685      0    stevel 	switch (mode) {
   2686      0    stevel 	case V_WRITE:
   2687      0    stevel 		if (vp->v_wrcnt)
   2688      0    stevel 			return (V_TRUE);
   2689      0    stevel 		break;
   2690      0    stevel 	case V_RDANDWR:
   2691      0    stevel 		if (vp->v_rdcnt && vp->v_wrcnt)
   2692      0    stevel 			return (V_TRUE);
   2693      0    stevel 		break;
   2694      0    stevel 	case V_RDORWR:
   2695      0    stevel 		if (vp->v_rdcnt || vp->v_wrcnt)
   2696      0    stevel 			return (V_TRUE);
   2697      0    stevel 		break;
   2698      0    stevel 	case V_READ:
   2699      0    stevel 		if (vp->v_rdcnt)
   2700      0    stevel 			return (V_TRUE);
   2701      0    stevel 		break;
   2702      0    stevel 	}
   2703      0    stevel 
   2704      0    stevel 	return (V_FALSE);
   2705      0    stevel }
   2706      0    stevel 
   2707      0    stevel /*
   2708      0    stevel  * vn_is_mapped() checks whether a particular file is mapped and whether
   2709      0    stevel  * the file is mapped read and/or write.
   2710      0    stevel  */
   2711      0    stevel int
   2712      0    stevel vn_is_mapped(
   2713      0    stevel 	vnode_t *vp,
   2714      0    stevel 	v_mode_t mode)
   2715      0    stevel {
   2716      0    stevel 
   2717      0    stevel 	ASSERT(vp != NULL);
   2718      0    stevel 
   2719      0    stevel #if !defined(_LP64)
   2720      0    stevel 	switch (mode) {
   2721      0    stevel 	/*
   2722      0    stevel 	 * The atomic_add_64_nv functions force atomicity in the
   2723      0    stevel 	 * case of 32 bit architectures. Otherwise the 64 bit values
   2724      0    stevel 	 * require two fetches. The value of the fields may be
   2725      0    stevel 	 * (potentially) changed between the first fetch and the
   2726      0    stevel 	 * second
   2727      0    stevel 	 */
   2728      0    stevel 	case V_WRITE:
   2729      0    stevel 		if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
   2730      0    stevel 			return (V_TRUE);
   2731      0    stevel 		break;
   2732      0    stevel 	case V_RDANDWR:
   2733      0    stevel 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
   2734      0    stevel 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
   2735      0    stevel 			return (V_TRUE);
   2736      0    stevel 		break;
   2737      0    stevel 	case V_RDORWR:
   2738      0    stevel 		if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
   2739      0    stevel 		    (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
   2740      0    stevel 			return (V_TRUE);
   2741      0    stevel 		break;
   2742      0    stevel 	case V_READ:
   2743      0    stevel 		if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
   2744      0    stevel 			return (V_TRUE);
   2745      0    stevel 		break;
   2746      0    stevel 	}
   2747      0    stevel #else
   2748      0    stevel 	switch (mode) {
   2749      0    stevel 	case V_WRITE:
   2750      0    stevel 		if (vp->v_mmap_write)
   2751      0    stevel 			return (V_TRUE);
   2752      0    stevel 		break;
   2753      0    stevel 	case V_RDANDWR:
   2754      0    stevel 		if (vp->v_mmap_read && vp->v_mmap_write)
   2755      0    stevel 			return (V_TRUE);
   2756      0    stevel 		break;
   2757      0    stevel 	case V_RDORWR:
   2758      0    stevel 		if (vp->v_mmap_read || vp->v_mmap_write)
   2759      0    stevel 			return (V_TRUE);
   2760      0    stevel 		break;
   2761      0    stevel 	case V_READ:
   2762      0    stevel 		if (vp->v_mmap_read)
   2763      0    stevel 			return (V_TRUE);
   2764      0    stevel 		break;
   2765      0    stevel 	}
   2766      0    stevel #endif
   2767      0    stevel 
   2768      0    stevel 	return (V_FALSE);
   2769      0    stevel }
   2770      0    stevel 
   2771      0    stevel /*
   2772      0    stevel  * Set the operations vector for a vnode.
   2773      0    stevel  *
   2774      0    stevel  * FEM ensures that the v_femhead pointer is filled in before the
   2775      0    stevel  * v_op pointer is changed.  This means that if the v_femhead pointer
   2776      0    stevel  * is NULL, and the v_op field hasn't changed since before which checked
   2777      0    stevel  * the v_femhead pointer; then our update is ok - we are not racing with
   2778      0    stevel  * FEM.
   2779      0    stevel  */
   2780      0    stevel void
   2781      0    stevel vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
   2782      0    stevel {
   2783      0    stevel 	vnodeops_t	*op;
   2784      0    stevel 
   2785      0    stevel 	ASSERT(vp != NULL);
   2786      0    stevel 	ASSERT(vnodeops != NULL);
   2787      0    stevel 
   2788      0    stevel 	op = vp->v_op;
   2789      0    stevel 	membar_consumer();
   2790      0    stevel 	/*
   2791      0    stevel 	 * If vp->v_femhead == NULL, then we'll call casptr() to do the
   2792      0    stevel 	 * compare-and-swap on vp->v_op.  If either fails, then FEM is
   2793      0    stevel 	 * in effect on the vnode and we need to have FEM deal with it.
   2794      0    stevel 	 */
   2795      0    stevel 	if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
   2796      0    stevel 		fem_setvnops(vp, vnodeops);
   2797      0    stevel 	}
   2798      0    stevel }
   2799      0    stevel 
   2800      0    stevel /*
   2801      0    stevel  * Retrieve the operations vector for a vnode
   2802      0    stevel  * As with vn_setops(above); make sure we aren't racing with FEM.
   2803      0    stevel  * FEM sets the v_op to a special, internal, vnodeops that wouldn't
   2804      0    stevel  * make sense to the callers of this routine.
   2805      0    stevel  */
   2806      0    stevel vnodeops_t *
   2807      0    stevel vn_getops(vnode_t *vp)
   2808      0    stevel {
   2809      0    stevel 	vnodeops_t	*op;
   2810      0    stevel 
   2811      0    stevel 	ASSERT(vp != NULL);
   2812      0    stevel 
   2813      0    stevel 	op = vp->v_op;
   2814      0    stevel 	membar_consumer();
   2815      0    stevel 	if (vp->v_femhead == NULL && op == vp->v_op) {
   2816      0    stevel 		return (op);
   2817      0    stevel 	} else {
   2818      0    stevel 		return (fem_getvnops(vp));
   2819      0    stevel 	}
   2820      0    stevel }
   2821      0    stevel 
   2822      0    stevel /*
   2823      0    stevel  * Returns non-zero (1) if the vnodeops matches that of the vnode.
   2824      0    stevel  * Returns zero (0) if not.
   2825      0    stevel  */
   2826      0    stevel int
   2827      0    stevel vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
   2828      0    stevel {
   2829      0    stevel 	return (vn_getops(vp) == vnodeops);
   2830      0    stevel }
   2831      0    stevel 
   2832      0    stevel /*
   2833      0    stevel  * Returns non-zero (1) if the specified operation matches the
   2834      0    stevel  * corresponding operation for that the vnode.
   2835      0    stevel  * Returns zero (0) if not.
   2836      0    stevel  */
   2837      0    stevel 
   2838      0    stevel #define	MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
   2839      0    stevel 
   2840      0    stevel int
   2841      0    stevel vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
   2842      0    stevel {
   2843      0    stevel 	const fs_operation_trans_def_t *otdp;
   2844      0    stevel 	fs_generic_func_p *loc = NULL;
   2845      0    stevel 	vnodeops_t	*vop = vn_getops(vp);
   2846      0    stevel 
   2847      0    stevel 	ASSERT(vopname != NULL);
   2848      0    stevel 
   2849      0    stevel 	for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
   2850      0    stevel 		if (MATCHNAME(otdp->name, vopname)) {
   2851   4956  pf199842 			loc = (fs_generic_func_p *)
   2852   4956  pf199842 			    ((char *)(vop) + otdp->offset);
   2853      0    stevel 			break;
   2854      0    stevel 		}
   2855      0    stevel 	}
   2856      0    stevel 
   2857      0    stevel 	return ((loc != NULL) && (*loc == funcp));
   2858      0    stevel }
   2859      0    stevel 
   2860      0    stevel /*
   2861      0    stevel  * fs_new_caller_id() needs to return a unique ID on a given local system.
   2862      0    stevel  * The IDs do not need to survive across reboots.  These are primarily
   2863      0    stevel  * used so that (FEM) monitors can detect particular callers (such as
   2864      0    stevel  * the NFS server) to a given vnode/vfs operation.
   2865      0    stevel  */
   2866      0    stevel u_longlong_t
   2867      0    stevel fs_new_caller_id()
   2868      0    stevel {
   2869      0    stevel 	static uint64_t next_caller_id = 0LL; /* First call returns 1 */
   2870      0    stevel 
   2871      0    stevel 	return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
   2872      0    stevel }
   2873      0    stevel 
   2874      0    stevel /*
   2875      0    stevel  * Given a starting vnode and a path, updates the path in the target vnode in
   2876      0    stevel  * a safe manner.  If the vnode already has path information embedded, then the
   2877    254  eschrock  * cached path is left untouched.
   2878      0    stevel  */
   2879   3855  sn199410 
   2880   3855  sn199410 size_t max_vnode_path = 4 * MAXPATHLEN;
   2881   3855  sn199410 
   2882      0    stevel void
   2883      0    stevel vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
   2884      0    stevel     const char *path, size_t plen)
   2885      0    stevel {
   2886      0    stevel 	char	*rpath;
   2887      0    stevel 	vnode_t	*base;
   2888      0    stevel 	size_t	rpathlen, rpathalloc;
   2889      0    stevel 	int	doslash = 1;
   2890      0    stevel 
   2891      0    stevel 	if (*path == '/') {
   2892      0    stevel 		base = rootvp;
   2893      0    stevel 		path++;
   2894      0    stevel 		plen--;
   2895      0    stevel 	} else {
   2896      0    stevel 		base = startvp;
   2897      0    stevel 	}
   2898      0    stevel 
   2899      0    stevel 	/*
   2900      0    stevel 	 * We cannot grab base->v_lock while we hold vp->v_lock because of
   2901      0    stevel 	 * the potential for deadlock.
   2902      0    stevel 	 */
   2903      0    stevel 	mutex_enter(&base->v_lock);
   2904      0    stevel 	if (base->v_path == NULL) {
   2905      0    stevel 		mutex_exit(&base->v_lock);
   2906      0    stevel 		return;
   2907      0    stevel 	}
   2908      0    stevel 
   2909      0    stevel 	rpathlen = strlen(base->v_path);
   2910      0    stevel 	rpathalloc = rpathlen + plen + 1;
   2911      0    stevel 	/* Avoid adding a slash if there's already one there */
   2912      0    stevel 	if (base->v_path[rpathlen-1] == '/')
   2913      0    stevel 		doslash = 0;
   2914      0    stevel 	else
   2915      0    stevel 		rpathalloc++;
   2916      0    stevel 
   2917      0    stevel 	/*
   2918      0    stevel 	 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
   2919      0    stevel 	 * so we must do this dance.  If, by chance, something changes the path,
   2920      0    stevel 	 * just give up since there is no real harm.
   2921      0    stevel 	 */
   2922      0    stevel 	mutex_exit(&base->v_lock);
   2923   3855  sn199410 
   2924   3855  sn199410 	/* Paths should stay within reason */
   2925   3855  sn199410 	if (rpathalloc > max_vnode_path)
   2926   3855  sn199410 		return;
   2927      0    stevel 
   2928      0    stevel 	rpath = kmem_alloc(rpathalloc, KM_SLEEP);
   2929      0    stevel 
   2930      0    stevel 	mutex_enter(&base->v_lock);
   2931      0    stevel 	if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
   2932      0    stevel 		mutex_exit(&base->v_lock);
   2933      0    stevel 		kmem_free(rpath, rpathalloc);
   2934      0    stevel 		return;
   2935      0    stevel 	}
   2936      0    stevel 	bcopy(base->v_path, rpath, rpathlen);
   2937      0    stevel 	mutex_exit(&base->v_lock);
   2938      0    stevel 
   2939      0    stevel 	if (doslash)
   2940      0    stevel 		rpath[rpathlen++] = '/';
   2941      0    stevel 	bcopy(path, rpath + rpathlen, plen);
   2942      0    stevel 	rpath[rpathlen + plen] = '\0';
   2943      0    stevel 
   2944      0    stevel 	mutex_enter(&vp->v_lock);
   2945      0    stevel 	if (vp->v_path != NULL) {
   2946      0    stevel 		mutex_exit(&vp->v_lock);
   2947      0    stevel 		kmem_free(rpath, rpathalloc);
   2948      0    stevel 	} else {
   2949      0    stevel 		vp->v_path = rpath;
   2950      0    stevel 		mutex_exit(&vp->v_lock);
   2951      0    stevel 	}
   2952      0    stevel }
   2953      0    stevel 
   2954      0    stevel /*
   2955      0    stevel  * Sets the path to the vnode to be the given string, regardless of current
   2956      0    stevel  * context.  The string must be a complete path from rootdir.  This is only used
   2957      0    stevel  * by fsop_root() for setting the path based on the mountpoint.
   2958      0    stevel  */
   2959      0    stevel void
   2960      0    stevel vn_setpath_str(struct vnode *vp, const char *str, size_t len)
   2961      0    stevel {
   2962      0    stevel 	char *buf = kmem_alloc(len + 1, KM_SLEEP);
   2963      0    stevel 
   2964      0    stevel 	mutex_enter(&vp->v_lock);
   2965      0    stevel 	if (vp->v_path != NULL) {
   2966      0    stevel 		mutex_exit(&vp->v_lock);
   2967      0    stevel 		kmem_free(buf, len + 1);
   2968      0    stevel 		return;
   2969      0    stevel 	}
   2970      0    stevel 
   2971      0    stevel 	vp->v_path = buf;
   2972      0    stevel 	bcopy(str, vp->v_path, len);
   2973      0    stevel 	vp->v_path[len] = '\0';
   2974      0    stevel 
   2975      0    stevel 	mutex_exit(&vp->v_lock);
   2976      0    stevel }
   2977      0    stevel 
   2978      0    stevel /*
   2979   6976  eschrock  * Called from within filesystem's vop_rename() to handle renames once the
   2980   6976  eschrock  * target vnode is available.
   2981   6976  eschrock  */
   2982   6976  eschrock void
   2983   6976  eschrock vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
   2984   6976  eschrock {
   2985   6976  eschrock 	char *tmp;
   2986   6976  eschrock 
   2987   6976  eschrock 	mutex_enter(&vp->v_lock);
   2988   6976  eschrock 	tmp = vp->v_path;
   2989   6976  eschrock 	vp->v_path = NULL;
   2990   6976  eschrock 	mutex_exit(&vp->v_lock);
   2991   6976  eschrock 	vn_setpath(rootdir, dvp, vp, nm, len);
   2992   6976  eschrock 	if (tmp != NULL)
   2993   6976  eschrock 		kmem_free(tmp, strlen(tmp) + 1);
   2994   6976  eschrock }
   2995   6976  eschrock 
   2996   6976  eschrock /*
   2997      0    stevel  * Similar to vn_setpath_str(), this function sets the path of the destination
   2998      0    stevel  * vnode to the be the same as the source vnode.
   2999      0    stevel  */
   3000      0    stevel void
   3001      0    stevel vn_copypath(struct vnode *src, struct vnode *dst)
   3002      0    stevel {
   3003      0    stevel 	char *buf;
   3004      0    stevel 	int alloc;
   3005      0    stevel 
   3006      0    stevel 	mutex_enter(&src->v_lock);
   3007      0    stevel 	if (src->v_path == NULL) {
   3008      0    stevel 		mutex_exit(&src->v_lock);
   3009      0    stevel 		return;
   3010      0    stevel 	}
   3011      0    stevel 	alloc = strlen(src->v_path) + 1;
   3012      0    stevel 
   3013      0    stevel 	/* avoid kmem_alloc() with lock held */
   3014      0    stevel 	mutex_exit(&src->v_lock);
   3015      0    stevel 	buf = kmem_alloc(alloc, KM_SLEEP);
   3016      0    stevel 	mutex_enter(&src->v_lock);
   3017      0    stevel 	if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
   3018      0    stevel 		mutex_exit(&src->v_lock);
   3019      0    stevel 		kmem_free(buf, alloc);
   3020      0    stevel 		return;
   3021      0    stevel 	}
   3022      0    stevel 	bcopy(src->v_path, buf, alloc);
   3023      0    stevel 	mutex_exit(&src->v_lock);
   3024      0    stevel 
   3025      0    stevel 	mutex_enter(&dst->v_lock);
   3026      0    stevel 	if (dst->v_path != NULL) {
   3027      0    stevel 		mutex_exit(&dst->v_lock);
   3028      0    stevel 		kmem_free(buf, alloc);
   3029      0    stevel 		return;
   3030      0    stevel 	}
   3031      0    stevel 	dst->v_path = buf;
   3032      0    stevel 	mutex_exit(&dst->v_lock);
   3033      0    stevel }
   3034      0    stevel 
   3035      0    stevel /*
   3036      0    stevel  * XXX Private interface for segvn routines that handle vnode
   3037      0    stevel  * large page segments.
   3038      0    stevel  *
   3039      0    stevel  * return 1 if vp's file system VOP_PAGEIO() implementation
   3040      0    stevel  * can be safely used instead of VOP_GETPAGE() for handling
   3041      0    stevel  * pagefaults against regular non swap files. VOP_PAGEIO()
   3042      0    stevel  * interface is considered safe here if its implementation
   3043      0    stevel  * is very close to VOP_GETPAGE() implementation.
   3044      0    stevel  * e.g. It zero's out the part of the page beyond EOF. Doesn't
   3045      0    stevel  * panic if there're file holes but instead returns an error.
   3046      0    stevel  * Doesn't assume file won't be changed by user writes, etc.
   3047      0    stevel  *
   3048      0    stevel  * return 0 otherwise.
   3049      0    stevel  *
   3050      0    stevel  * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
   3051      0    stevel  */
   3052      0    stevel int
   3053      0    stevel vn_vmpss_usepageio(vnode_t *vp)
   3054      0    stevel {
   3055      0    stevel 	vfs_t   *vfsp = vp->v_vfsp;
   3056      0    stevel 	char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
   3057      0    stevel 	char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
   3058      0    stevel 	char **fsok = pageio_ok_fss;
   3059      0    stevel 
   3060      0    stevel 	if (fsname == NULL) {
   3061      0    stevel 		return (0);
   3062      0    stevel 	}
   3063      0    stevel 
   3064      0    stevel 	for (; *fsok; fsok++) {
   3065      0    stevel 		if (strcmp(*fsok, fsname) == 0) {
   3066      0    stevel 			return (1);
   3067      0    stevel 		}
   3068      0    stevel 	}
   3069      0    stevel 	return (0);
   3070      0    stevel }
   3071      0    stevel 
   3072      0    stevel /* VOP_XXX() macros call the corresponding fop_xxx() function */
   3073      0    stevel 
   3074      0    stevel int
   3075      0    stevel fop_open(
   3076      0    stevel 	vnode_t **vpp,
   3077      0    stevel 	int mode,
   3078   5331       amw 	cred_t *cr,
   3079   5331       amw 	caller_context_t *ct)
   3080      0    stevel {
   3081      0    stevel 	int ret;
   3082      0    stevel 	vnode_t *vp = *vpp;
   3083      0    stevel 
   3084      0    stevel 	VN_HOLD(vp);
   3085      0    stevel 	/*
   3086      0    stevel 	 * Adding to the vnode counts before calling open
   3087      0    stevel 	 * avoids the need for a mutex. It circumvents a race
   3088      0    stevel 	 * condition where a query made on the vnode counts results in a
   3089      0    stevel 	 * false negative. The inquirer goes away believing the file is
   3090      0    stevel 	 * not open when there is an open on the file already under way.
   3091      0    stevel 	 *
   3092      0    stevel 	 * The counts are meant to prevent NFS from granting a delegation
   3093      0    stevel 	 * when it would be dangerous to do so.
   3094      0    stevel 	 *
   3095      0    stevel 	 * The vnode counts are only kept on regular files
   3096      0    stevel 	 */
   3097      0    stevel 	if ((*vpp)->v_type == VREG) {
   3098      0    stevel 		if (mode & FREAD)
   3099      0    stevel 			atomic_add_32(&((*vpp)->v_rdcnt), 1);
   3100      0    stevel 		if (mode & FWRITE)
   3101      0    stevel 			atomic_add_32(&((*vpp)->v_wrcnt), 1);
   3102      0    stevel 	}
   3103      0    stevel 
   3104   4321    casper 	VOPXID_MAP_CR(vp, cr);
   3105   4321    casper 
   3106   5331       amw 	ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
   3107      0    stevel 
   3108      0    stevel 	if (ret) {
   3109      0    stevel 		/*
   3110      0    stevel 		 * Use the saved vp just in case the vnode ptr got trashed
   3111      0    stevel 		 * by the error.
   3112      0    stevel 		 */
   3113   1738       bmc 		VOPSTATS_UPDATE(vp, open);
   3114      0    stevel 		if ((vp->v_type == VREG) && (mode & FREAD))
   3115      0    stevel 			atomic_add_32(&(vp->v_rdcnt), -1);
   3116      0    stevel 		if ((vp->v_type == VREG) && (mode & FWRITE))
   3117      0    stevel 			atomic_add_32(&(vp->v_wrcnt), -1);
   3118      0    stevel 	} else {
   3119      0    stevel 		/*
   3120      0    stevel 		 * Some filesystems will return a different vnode,
   3121      0    stevel 		 * but the same path was still used to open it.
   3122      0    stevel 		 * So if we do change the vnode and need to
   3123      0    stevel 		 * copy over the path, do so here, rather than special
   3124      0    stevel 		 * casing each filesystem. Adjust the vnode counts to
   3125      0    stevel 		 * reflect the vnode switch.
   3126      0    stevel 		 */
   3127   1738       bmc 		VOPSTATS_UPDATE(*vpp, open);
   3128      0    stevel 		if (*vpp != vp && *vpp != NULL) {
   3129    254  eschrock 			vn_copypath(vp, *vpp);
   3130    254  eschrock 			if (((*vpp)->v_type == VREG) && (mode & FREAD))
   3131    254  eschrock 				atomic_add_32(&((*vpp)->v_rdcnt), 1);
   3132    254  eschrock 			if ((vp->v_type == VREG) && (mode & FREAD))
   3133    254  eschrock 				atomic_add_32(&(vp->v_rdcnt), -1);
   3134    254  eschrock 			if (((*vpp)->v_type == VREG) && (mode & FWRITE))
   3135    254  eschrock 				atomic_add_32(&((*vpp)->v_wrcnt), 1);
   3136    254  eschrock 			if ((vp->v_type == VREG) && (mode & FWRITE))
   3137    254  eschrock 				atomic_add_32(&(vp->v_wrcnt), -1);
   3138      0    stevel 		}
   3139      0    stevel 	}
   3140      0    stevel 	VN_RELE(vp);
   3141      0    stevel 	return (ret);
   3142      0    stevel }
   3143      0    stevel 
   3144      0    stevel int
   3145      0    <