Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22   8636      Mark  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25   4144     peteh 
     26   4144     peteh /* Portions Copyright 2007 Jeremy Teo */
     27    789    ahrens 
     28   3444  ek110237 #ifdef _KERNEL
     29    789    ahrens #include <sys/types.h>
     30    789    ahrens #include <sys/param.h>
     31    789    ahrens #include <sys/time.h>
     32    789    ahrens #include <sys/systm.h>
     33    789    ahrens #include <sys/sysmacros.h>
     34    789    ahrens #include <sys/resource.h>
     35    789    ahrens #include <sys/mntent.h>
     36   1816     marks #include <sys/mkdev.h>
     37   5498      timh #include <sys/u8_textprep.h>
     38   6492      timh #include <sys/dsl_dataset.h>
     39    789    ahrens #include <sys/vfs.h>
     40   3898       rsb #include <sys/vfs_opreg.h>
     41    789    ahrens #include <sys/vnode.h>
     42    789    ahrens #include <sys/file.h>
     43    789    ahrens #include <sys/kmem.h>
     44    789    ahrens #include <sys/errno.h>
     45    789    ahrens #include <sys/unistd.h>
     46    789    ahrens #include <sys/mode.h>
     47    789    ahrens #include <sys/atomic.h>
     48    789    ahrens #include <vm/pvn.h>
     49    789    ahrens #include "fs/fs_subr.h"
     50    789    ahrens #include <sys/zfs_dir.h>
     51    789    ahrens #include <sys/zfs_acl.h>
     52    789    ahrens #include <sys/zfs_ioctl.h>
     53   3444  ek110237 #include <sys/zfs_rlock.h>
     54   5331       amw #include <sys/zfs_fuid.h>
     55   3444  ek110237 #include <sys/fs/zfs.h>
     56   5331       amw #include <sys/kidmap.h>
     57   3444  ek110237 #endif /* _KERNEL */
     58   3444  ek110237 
     59   3444  ek110237 #include <sys/dmu.h>
     60   3444  ek110237 #include <sys/refcount.h>
     61   3444  ek110237 #include <sys/stat.h>
     62   3444  ek110237 #include <sys/zap.h>
     63    789    ahrens #include <sys/zfs_znode.h>
     64   5498      timh 
     65   5498      timh #include "zfs_prop.h"
     66    789    ahrens 
     67   3444  ek110237 /*
     68   6712     tomee  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
     69   6712     tomee  * turned on when DEBUG is also defined.
     70   6712     tomee  */
     71   6712     tomee #ifdef	DEBUG
     72   6712     tomee #define	ZNODE_STATS
     73   6712     tomee #endif	/* DEBUG */
     74   6712     tomee 
     75   6712     tomee #ifdef	ZNODE_STATS
     76   6712     tomee #define	ZNODE_STAT_ADD(stat)			((stat)++)
     77   6712     tomee #else
     78   6712     tomee #define	ZNODE_STAT_ADD(stat)			/* nothing */
     79   6712     tomee #endif	/* ZNODE_STATS */
     80   6712     tomee 
     81   6712     tomee #define	POINTER_IS_VALID(p)	(!((uintptr_t)(p) & 0x3))
     82   6712     tomee #define	POINTER_INVALIDATE(pp)	(*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1))
     83   6712     tomee 
     84   6712     tomee /*
     85   3444  ek110237  * Functions needed for userland (ie: libzpool) are not put under
     86   3444  ek110237  * #ifdef_KERNEL; the rest of the functions have dependencies
     87   3444  ek110237  * (such as VFS logic) that will not compile easily in userland.
     88   3444  ek110237  */
     89   3444  ek110237 #ifdef _KERNEL
     90   9788       Tom /*
     91   9788       Tom  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
     92   9788       Tom  * be freed before it can be safely accessed.
     93   9788       Tom  */
     94   9788       Tom krwlock_t zfsvfs_lock;
     95   9788       Tom 
     96   6712     tomee static kmem_cache_t *znode_cache = NULL;
     97    789    ahrens 
     98    789    ahrens /*ARGSUSED*/
     99    789    ahrens static void
    100   5642    maybee znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
    101    789    ahrens {
    102   5642    maybee 	/*
    103   5642    maybee 	 * We should never drop all dbuf refs without first clearing
    104   5642    maybee 	 * the eviction callback.
    105   5642    maybee 	 */
    106   5642    maybee 	panic("evicting znode %p\n", user_ptr);
    107    789    ahrens }
    108    789    ahrens 
    109    789    ahrens /*ARGSUSED*/
    110    789    ahrens static int
    111   6712     tomee zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
    112    789    ahrens {
    113    789    ahrens 	znode_t *zp = buf;
    114    789    ahrens 
    115   6712     tomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
    116   6712     tomee 
    117   6712     tomee 	zp->z_vnode = vn_alloc(kmflags);
    118   6712     tomee 	if (zp->z_vnode == NULL) {
    119   6712     tomee 		return (-1);
    120   6712     tomee 	}
    121   6712     tomee 	ZTOV(zp)->v_data = zp;
    122   6712     tomee 
    123   6712     tomee 	list_link_init(&zp->z_link_node);
    124   6712     tomee 
    125    789    ahrens 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
    126   1669    perrin 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
    127   3897    maybee 	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
    128    789    ahrens 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
    129   1669    perrin 
    130   1669    perrin 	mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
    131   1669    perrin 	avl_create(&zp->z_range_avl, zfs_range_compare,
    132   1669    perrin 	    sizeof (rl_t), offsetof(rl_t, r_node));
    133   1669    perrin 
    134   5446    ahrens 	zp->z_dbuf = NULL;
    135   6712     tomee 	zp->z_dirlocks = NULL;
    136   9981       Tim 	zp->z_acl_cached = NULL;
    137    789    ahrens 	return (0);
    138    789    ahrens }
    139    789    ahrens 
    140    789    ahrens /*ARGSUSED*/
    141    789    ahrens static void
    142   6712     tomee zfs_znode_cache_destructor(void *buf, void *arg)
    143    789    ahrens {
    144    789    ahrens 	znode_t *zp = buf;
    145    789    ahrens 
    146   6712     tomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
    147   6712     tomee 	ASSERT(ZTOV(zp)->v_data == zp);
    148   6712     tomee 	vn_free(ZTOV(zp));
    149   6712     tomee 	ASSERT(!list_link_active(&zp->z_link_node));
    150    789    ahrens 	mutex_destroy(&zp->z_lock);
    151   1669    perrin 	rw_destroy(&zp->z_parent_lock);
    152   3897    maybee 	rw_destroy(&zp->z_name_lock);
    153    789    ahrens 	mutex_destroy(&zp->z_acl_lock);
    154   1669    perrin 	avl_destroy(&zp->z_range_avl);
    155   4831   gw25295 	mutex_destroy(&zp->z_range_lock);
    156    789    ahrens 
    157   5446    ahrens 	ASSERT(zp->z_dbuf == NULL);
    158   6712     tomee 	ASSERT(zp->z_dirlocks == NULL);
    159  10143       Tim 	ASSERT(zp->z_acl_cached == NULL);
    160   6712     tomee }
    161   6712     tomee 
    162   6712     tomee #ifdef	ZNODE_STATS
    163   6712     tomee static struct {
    164   6712     tomee 	uint64_t zms_zfsvfs_invalid;
    165   9788       Tom 	uint64_t zms_zfsvfs_recheck1;
    166   6712     tomee 	uint64_t zms_zfsvfs_unmounted;
    167   9788       Tom 	uint64_t zms_zfsvfs_recheck2;
    168   7579       Tom 	uint64_t zms_obj_held;
    169   6712     tomee 	uint64_t zms_vnode_locked;
    170   7579       Tom 	uint64_t zms_not_only_dnlc;
    171   6712     tomee } znode_move_stats;
    172   6712     tomee #endif	/* ZNODE_STATS */
    173   6712     tomee 
    174   6712     tomee static void
    175   6712     tomee zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
    176   6712     tomee {
    177   6712     tomee 	vnode_t *vp;
    178   6712     tomee 
    179   6712     tomee 	/* Copy fields. */
    180   6712     tomee 	nzp->z_zfsvfs = ozp->z_zfsvfs;
    181   6712     tomee 
    182   6712     tomee 	/* Swap vnodes. */
    183   6712     tomee 	vp = nzp->z_vnode;
    184   6712     tomee 	nzp->z_vnode = ozp->z_vnode;
    185   6712     tomee 	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
    186   6712     tomee 	ZTOV(ozp)->v_data = ozp;
    187   6712     tomee 	ZTOV(nzp)->v_data = nzp;
    188   6712     tomee 
    189   6712     tomee 	nzp->z_id = ozp->z_id;
    190   6712     tomee 	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
    191   6712     tomee 	ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
    192   6712     tomee 	nzp->z_unlinked = ozp->z_unlinked;
    193   6712     tomee 	nzp->z_atime_dirty = ozp->z_atime_dirty;
    194   6712     tomee 	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
    195   6712     tomee 	nzp->z_blksz = ozp->z_blksz;
    196   6712     tomee 	nzp->z_seq = ozp->z_seq;
    197   6712     tomee 	nzp->z_mapcnt = ozp->z_mapcnt;
    198   6712     tomee 	nzp->z_last_itx = ozp->z_last_itx;
    199   6712     tomee 	nzp->z_gen = ozp->z_gen;
    200   6712     tomee 	nzp->z_sync_cnt = ozp->z_sync_cnt;
    201   6712     tomee 	nzp->z_phys = ozp->z_phys;
    202   6712     tomee 	nzp->z_dbuf = ozp->z_dbuf;
    203  10250      Mark 
    204  10250      Mark 	/*
    205  10269      Mark 	 * Since this is just an idle znode and kmem is already dealing with
    206  10269      Mark 	 * memory pressure, release any cached ACL.
    207  10250      Mark 	 */
    208  10250      Mark 	if (ozp->z_acl_cached) {
    209  10250      Mark 		zfs_acl_free(ozp->z_acl_cached);
    210  10250      Mark 		ozp->z_acl_cached = NULL;
    211  10250      Mark 	}
    212   6712     tomee 
    213   6712     tomee 	/* Update back pointers. */
    214   6712     tomee 	(void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys,
    215   6712     tomee 	    znode_evict_error);
    216   6712     tomee 
    217   6712     tomee 	/*
    218   6712     tomee 	 * Invalidate the original znode by clearing fields that provide a
    219   6712     tomee 	 * pointer back to the znode. Set the low bit of the vfs pointer to
    220   6712     tomee 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
    221   6712     tomee 	 * subsequent callback.
    222   6712     tomee 	 */
    223   6712     tomee 	ozp->z_dbuf = NULL;
    224   6712     tomee 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
    225   6712     tomee }
    226   6712     tomee 
    227   6712     tomee /*ARGSUSED*/
    228   6712     tomee static kmem_cbrc_t
    229   6712     tomee zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
    230   6712     tomee {
    231   6712     tomee 	znode_t *ozp = buf, *nzp = newbuf;
    232   6712     tomee 	zfsvfs_t *zfsvfs;
    233   6712     tomee 	vnode_t *vp;
    234   6712     tomee 
    235   6712     tomee 	/*
    236   6712     tomee 	 * The znode is on the file system's list of known znodes if the vfs
    237   6712     tomee 	 * pointer is valid. We set the low bit of the vfs pointer when freeing
    238   6712     tomee 	 * the znode to invalidate it, and the memory patterns written by kmem
    239   6712     tomee 	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
    240   6712     tomee 	 * created znode sets the vfs pointer last of all to indicate that the
    241   6712     tomee 	 * znode is known and in a valid state to be moved by this function.
    242   6712     tomee 	 */
    243   6712     tomee 	zfsvfs = ozp->z_zfsvfs;
    244   6712     tomee 	if (!POINTER_IS_VALID(zfsvfs)) {
    245   6712     tomee 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
    246   6712     tomee 		return (KMEM_CBRC_DONT_KNOW);
    247   6712     tomee 	}
    248   6712     tomee 
    249   6712     tomee 	/*
    250   9788       Tom 	 * Close a small window in which it's possible that the filesystem could
    251   9788       Tom 	 * be unmounted and freed, and zfsvfs, though valid in the previous
    252   9788       Tom 	 * statement, could point to unrelated memory by the time we try to
    253   9788       Tom 	 * prevent the filesystem from being unmounted.
    254   9788       Tom 	 */
    255   9788       Tom 	rw_enter(&zfsvfs_lock, RW_WRITER);
    256   9788       Tom 	if (zfsvfs != ozp->z_zfsvfs) {
    257   9788       Tom 		rw_exit(&zfsvfs_lock);
    258   9788       Tom 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
    259   9788       Tom 		return (KMEM_CBRC_DONT_KNOW);
    260   9788       Tom 	}
    261   9788       Tom 
    262   9788       Tom 	/*
    263   9788       Tom 	 * If the znode is still valid, then so is the file system. We know that
    264   9788       Tom 	 * no valid file system can be freed while we hold zfsvfs_lock, so we
    265   9788       Tom 	 * can safely ensure that the filesystem is not and will not be
    266   9788       Tom 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
    267   6712     tomee 	 */
    268   9396   Matthew 	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
    269   9396   Matthew 	if (zfsvfs->z_unmounted) {
    270   9396   Matthew 		ZFS_EXIT(zfsvfs);
    271   9788       Tom 		rw_exit(&zfsvfs_lock);
    272   6712     tomee 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
    273   6712     tomee 		return (KMEM_CBRC_DONT_KNOW);
    274   6712     tomee 	}
    275   9788       Tom 	rw_exit(&zfsvfs_lock);
    276   6712     tomee 
    277   6712     tomee 	mutex_enter(&zfsvfs->z_znodes_lock);
    278   6712     tomee 	/*
    279   6712     tomee 	 * Recheck the vfs pointer in case the znode was removed just before
    280   6712     tomee 	 * acquiring the lock.
    281   6712     tomee 	 */
    282   6712     tomee 	if (zfsvfs != ozp->z_zfsvfs) {
    283   6712     tomee 		mutex_exit(&zfsvfs->z_znodes_lock);
    284   6712     tomee 		ZFS_EXIT(zfsvfs);
    285   9788       Tom 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
    286   6712     tomee 		return (KMEM_CBRC_DONT_KNOW);
    287   6712     tomee 	}
    288   6712     tomee 
    289   6712     tomee 	/*
    290   6712     tomee 	 * At this point we know that as long as we hold z_znodes_lock, the
    291   6712     tomee 	 * znode cannot be freed and fields within the znode can be safely
    292   7579       Tom 	 * accessed. Now, prevent a race with zfs_zget().
    293   6712     tomee 	 */
    294   7579       Tom 	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
    295   7579       Tom 		mutex_exit(&zfsvfs->z_znodes_lock);
    296   7579       Tom 		ZFS_EXIT(zfsvfs);
    297   7579       Tom 		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
    298   7579       Tom 		return (KMEM_CBRC_LATER);
    299   7579       Tom 	}
    300   7579       Tom 
    301   6712     tomee 	vp = ZTOV(ozp);
    302   6712     tomee 	if (mutex_tryenter(&vp->v_lock) == 0) {
    303   7579       Tom 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
    304   6712     tomee 		mutex_exit(&zfsvfs->z_znodes_lock);
    305   6712     tomee 		ZFS_EXIT(zfsvfs);
    306   6712     tomee 		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
    307   6712     tomee 		return (KMEM_CBRC_LATER);
    308   6712     tomee 	}
    309   7579       Tom 
    310   6712     tomee 	/* Only move znodes that are referenced _only_ by the DNLC. */
    311   6712     tomee 	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
    312   6712     tomee 		mutex_exit(&vp->v_lock);
    313   7579       Tom 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
    314   6712     tomee 		mutex_exit(&zfsvfs->z_znodes_lock);
    315   6712     tomee 		ZFS_EXIT(zfsvfs);
    316   7579       Tom 		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
    317   6712     tomee 		return (KMEM_CBRC_LATER);
    318   6712     tomee 	}
    319   6712     tomee 
    320   6712     tomee 	/*
    321   6712     tomee 	 * The znode is known and in a valid state to move. We're holding the
    322   6712     tomee 	 * locks needed to execute the critical section.
    323   6712     tomee 	 */
    324   6712     tomee 	zfs_znode_move_impl(ozp, nzp);
    325   6712     tomee 	mutex_exit(&vp->v_lock);
    326   7579       Tom 	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
    327   6712     tomee 
    328   6712     tomee 	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
    329   6712     tomee 	mutex_exit(&zfsvfs->z_znodes_lock);
    330   6712     tomee 	ZFS_EXIT(zfsvfs);
    331   6712     tomee 
    332   6712     tomee 	return (KMEM_CBRC_YES);
    333    789    ahrens }
    334    789    ahrens 
    335    789    ahrens void
    336    789    ahrens zfs_znode_init(void)
    337    789    ahrens {
    338    789    ahrens 	/*
    339    789    ahrens 	 * Initialize zcache
    340    789    ahrens 	 */
    341   9788       Tom 	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
    342    789    ahrens 	ASSERT(znode_cache == NULL);
    343    789    ahrens 	znode_cache = kmem_cache_create("zfs_znode_cache",
    344    789    ahrens 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
    345    789    ahrens 	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
    346   6712     tomee 	kmem_cache_set_move(znode_cache, zfs_znode_move);
    347    789    ahrens }
    348    789    ahrens 
    349    789    ahrens void
    350    789    ahrens zfs_znode_fini(void)
    351    789    ahrens {
    352    789    ahrens 	/*
    353    789    ahrens 	 * Cleanup vfs & vnode ops
    354    789    ahrens 	 */
    355    789    ahrens 	zfs_remove_op_tables();
    356    789    ahrens 
    357    789    ahrens 	/*
    358    789    ahrens 	 * Cleanup zcache
    359    789    ahrens 	 */
    360    789    ahrens 	if (znode_cache)
    361    789    ahrens 		kmem_cache_destroy(znode_cache);
    362    789    ahrens 	znode_cache = NULL;
    363   9788       Tom 	rw_destroy(&zfsvfs_lock);
    364    789    ahrens }
    365    789    ahrens 
    366    789    ahrens struct vnodeops *zfs_dvnodeops;
    367    789    ahrens struct vnodeops *zfs_fvnodeops;
    368    789    ahrens struct vnodeops *zfs_symvnodeops;
    369    789    ahrens struct vnodeops *zfs_xdvnodeops;
    370    789    ahrens struct vnodeops *zfs_evnodeops;
    371   8845       amw struct vnodeops *zfs_sharevnodeops;
    372    789    ahrens 
    373    789    ahrens void
    374    789    ahrens zfs_remove_op_tables()
    375    789    ahrens {
    376    789    ahrens 	/*
    377    789    ahrens 	 * Remove vfs ops
    378    789    ahrens 	 */
    379    789    ahrens 	ASSERT(zfsfstype);
    380    789    ahrens 	(void) vfs_freevfsops_by_type(zfsfstype);
    381    789    ahrens 	zfsfstype = 0;
    382    789    ahrens 
    383    789    ahrens 	/*
    384    789    ahrens 	 * Remove vnode ops
    385    789    ahrens 	 */
    386    789    ahrens 	if (zfs_dvnodeops)
    387    789    ahrens 		vn_freevnodeops(zfs_dvnodeops);
    388    789    ahrens 	if (zfs_fvnodeops)
    389    789    ahrens 		vn_freevnodeops(zfs_fvnodeops);
    390    789    ahrens 	if (zfs_symvnodeops)
    391    789    ahrens 		vn_freevnodeops(zfs_symvnodeops);
    392    789    ahrens 	if (zfs_xdvnodeops)
    393    789    ahrens 		vn_freevnodeops(zfs_xdvnodeops);
    394    789    ahrens 	if (zfs_evnodeops)
    395    789    ahrens 		vn_freevnodeops(zfs_evnodeops);
    396   8845       amw 	if (zfs_sharevnodeops)
    397   8845       amw 		vn_freevnodeops(zfs_sharevnodeops);
    398    789    ahrens 
    399    789    ahrens 	zfs_dvnodeops = NULL;
    400    789    ahrens 	zfs_fvnodeops = NULL;
    401    789    ahrens 	zfs_symvnodeops = NULL;
    402    789    ahrens 	zfs_xdvnodeops = NULL;
    403    789    ahrens 	zfs_evnodeops = NULL;
    404   8845       amw 	zfs_sharevnodeops = NULL;
    405    789    ahrens }
    406    789    ahrens 
    407    789    ahrens extern const fs_operation_def_t zfs_dvnodeops_template[];
    408    789    ahrens extern const fs_operation_def_t zfs_fvnodeops_template[];
    409    789    ahrens extern const fs_operation_def_t zfs_xdvnodeops_template[];
    410    789    ahrens extern const fs_operation_def_t zfs_symvnodeops_template[];
    411    789    ahrens extern const fs_operation_def_t zfs_evnodeops_template[];
    412   8845       amw extern const fs_operation_def_t zfs_sharevnodeops_template[];
    413    789    ahrens 
    414    789    ahrens int
    415    789    ahrens zfs_create_op_tables()
    416    789    ahrens {
    417    789    ahrens 	int error;
    418    789    ahrens 
    419    789    ahrens 	/*
    420    789    ahrens 	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
    421    789    ahrens 	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
    422    789    ahrens 	 * In this case we just return as the ops vectors are already set up.
    423    789    ahrens 	 */
    424    789    ahrens 	if (zfs_dvnodeops)
    425    789    ahrens 		return (0);
    426    789    ahrens 
    427    789    ahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
    428    789    ahrens 	    &zfs_dvnodeops);
    429    789    ahrens 	if (error)
    430    789    ahrens 		return (error);
    431    789    ahrens 
    432    789    ahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
    433    789    ahrens 	    &zfs_fvnodeops);
    434    789    ahrens 	if (error)
    435    789    ahrens 		return (error);
    436    789    ahrens 
    437    789    ahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
    438    789    ahrens 	    &zfs_symvnodeops);
    439    789    ahrens 	if (error)
    440    789    ahrens 		return (error);
    441    789    ahrens 
    442    789    ahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
    443    789    ahrens 	    &zfs_xdvnodeops);
    444    789    ahrens 	if (error)
    445    789    ahrens 		return (error);
    446    789    ahrens 
    447    789    ahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
    448    789    ahrens 	    &zfs_evnodeops);
    449   8845       amw 	if (error)
    450   8845       amw 		return (error);
    451   8845       amw 
    452   8845       amw 	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
    453   8845       amw 	    &zfs_sharevnodeops);
    454   8845       amw 
    455   8845       amw 	return (error);
    456   8845       amw }
    457   8845       amw 
    458   9030      Mark int
    459   8845       amw zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
    460   8845       amw {
    461   9179      Mark 	zfs_acl_ids_t acl_ids;
    462   8845       amw 	vattr_t vattr;
    463   8845       amw 	znode_t *sharezp;
    464   8845       amw 	vnode_t *vp;
    465   8845       amw 	znode_t *zp;
    466   8845       amw 	int error;
    467   8845       amw 
    468   8845       amw 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
    469   8845       amw 	vattr.va_type = VDIR;
    470   8845       amw 	vattr.va_mode = S_IFDIR|0555;
    471   8845       amw 	vattr.va_uid = crgetuid(kcred);
    472   8845       amw 	vattr.va_gid = crgetgid(kcred);
    473   8845       amw 
    474   8845       amw 	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
    475   8845       amw 	sharezp->z_unlinked = 0;
    476   8845       amw 	sharezp->z_atime_dirty = 0;
    477   8845       amw 	sharezp->z_zfsvfs = zfsvfs;
    478   8845       amw 
    479   8845       amw 	vp = ZTOV(sharezp);
    480   8845       amw 	vn_reinit(vp);
    481   8845       amw 	vp->v_type = VDIR;
    482   8845       amw 
    483   9179      Mark 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
    484   9179      Mark 	    kcred, NULL, &acl_ids));
    485   8845       amw 	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE,
    486   9179      Mark 	    &zp, 0, &acl_ids);
    487   8845       amw 	ASSERT3P(zp, ==, sharezp);
    488   8845       amw 	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
    489   8845       amw 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
    490   8845       amw 	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
    491   8845       amw 	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
    492   8845       amw 	zfsvfs->z_shares_dir = sharezp->z_id;
    493   8845       amw 
    494   9179      Mark 	zfs_acl_ids_free(&acl_ids);
    495   8845       amw 	ZTOV(sharezp)->v_count = 0;
    496   8845       amw 	dmu_buf_rele(sharezp->z_dbuf, NULL);
    497   8845       amw 	sharezp->z_dbuf = NULL;
    498   8845       amw 	kmem_cache_free(znode_cache, sharezp);
    499   5331       amw 
    500   8845       amw 	return (error);
    501    789    ahrens }
    502    789    ahrens 
    503    789    ahrens /*
    504   1816     marks  * define a couple of values we need available
    505   1816     marks  * for both 64 and 32 bit environments.
    506   1816     marks  */
    507   1816     marks #ifndef NBITSMINOR64
    508   1816     marks #define	NBITSMINOR64	32
    509   1816     marks #endif
    510   1816     marks #ifndef MAXMAJ64
    511   1816     marks #define	MAXMAJ64	0xffffffffUL
    512   1816     marks #endif
    513   1816     marks #ifndef	MAXMIN64
    514   1816     marks #define	MAXMIN64	0xffffffffUL
    515   1816     marks #endif
    516   1816     marks 
    517   1816     marks /*
    518   1816     marks  * Create special expldev for ZFS private use.
    519   1816     marks  * Can't use standard expldev since it doesn't do
    520   1816     marks  * what we want.  The standard expldev() takes a
    521   1816     marks  * dev32_t in LP64 and expands it to a long dev_t.
    522   1816     marks  * We need an interface that takes a dev32_t in ILP32
    523   1816     marks  * and expands it to a long dev_t.
    524   1816     marks  */
    525   1816     marks static uint64_t
    526   1816     marks zfs_expldev(dev_t dev)
    527   1816     marks {
    528   1816     marks #ifndef _LP64
    529   1816     marks 	major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
    530   1816     marks 	return (((uint64_t)major << NBITSMINOR64) |
    531   1816     marks 	    ((minor_t)dev & MAXMIN32));
    532   1816     marks #else
    533   1816     marks 	return (dev);
    534   1816     marks #endif
    535   1816     marks }
    536   1816     marks 
    537   1816     marks /*
    538   1816     marks  * Special cmpldev for ZFS private use.
    539   1816     marks  * Can't use standard cmpldev since it takes
    540   1816     marks  * a long dev_t and compresses it to dev32_t in
    541   1816     marks  * LP64.  We need to do a compaction of a long dev_t
    542   1816     marks  * to a dev32_t in ILP32.
    543   1816     marks  */
    544   1816     marks dev_t
    545   1816     marks zfs_cmpldev(uint64_t dev)
    546   1816     marks {
    547   1816     marks #ifndef _LP64
    548   1816     marks 	minor_t minor = (minor_t)dev & MAXMIN64;
    549   1816     marks 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
    550   1816     marks 
    551   1816     marks 	if (major > MAXMAJ32 || minor > MAXMIN32)
    552   1816     marks 		return (NODEV32);
    553   1816     marks 
    554   1816     marks 	return (((dev32_t)major << NBITSMINOR32) | minor);
    555   1816     marks #else
    556   1816     marks 	return (dev);
    557   1816     marks #endif
    558   1816     marks }
    559   1816     marks 
    560   5446    ahrens static void
    561   6712     tomee zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db)
    562   5446    ahrens {
    563   5446    ahrens 	znode_t		*nzp;
    564   5446    ahrens 
    565   6712     tomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
    566   6712     tomee 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
    567   5446    ahrens 
    568   5446    ahrens 	mutex_enter(&zp->z_lock);
    569   5446    ahrens 
    570   5446    ahrens 	ASSERT(zp->z_dbuf == NULL);
    571  10269      Mark 	ASSERT(zp->z_acl_cached == NULL);
    572   5446    ahrens 	zp->z_dbuf = db;
    573   5642    maybee 	nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error);
    574   5446    ahrens 
    575   5446    ahrens 	/*
    576   5446    ahrens 	 * there should be no
    577   5446    ahrens 	 * concurrent zgets on this object.
    578   5446    ahrens 	 */
    579   5446    ahrens 	if (nzp != NULL)
    580   7240   rh87107 		panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db);
    581   5446    ahrens 
    582   5446    ahrens 	/*
    583   5446    ahrens 	 * Slap on VROOT if we are the root znode
    584   5446    ahrens 	 */
    585   5446    ahrens 	if (zp->z_id == zfsvfs->z_root)
    586   5446    ahrens 		ZTOV(zp)->v_flag |= VROOT;
    587   5446    ahrens 
    588   5446    ahrens 	mutex_exit(&zp->z_lock);
    589   5446    ahrens 	vn_exists(ZTOV(zp));
    590   5446    ahrens }
    591   5446    ahrens 
    592   5642    maybee void
    593   5446    ahrens zfs_znode_dmu_fini(znode_t *zp)
    594   5446    ahrens {
    595   5446    ahrens 	dmu_buf_t *db = zp->z_dbuf;
    596   6712     tomee 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
    597   6712     tomee 	    zp->z_unlinked ||
    598   5642    maybee 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
    599   5446    ahrens 	ASSERT(zp->z_dbuf != NULL);
    600   5446    ahrens 	zp->z_dbuf = NULL;
    601   5642    maybee 	VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL));
    602   5446    ahrens 	dmu_buf_rele(db, NULL);
    603   5446    ahrens }
    604   5446    ahrens 
    605   1816     marks /*
    606    789    ahrens  * Construct a new znode/vnode and intialize.
    607    789    ahrens  *
    608    789    ahrens  * This does not do a call to dmu_set_user() that is
    609    789    ahrens  * up to the caller to do, in case you don't want to
    610    789    ahrens  * return the znode
    611    789    ahrens  */
    612   1544  eschrock static znode_t *
    613   5446    ahrens zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
    614    789    ahrens {
    615    789    ahrens 	znode_t	*zp;
    616    789    ahrens 	vnode_t *vp;
    617    789    ahrens 
    618    789    ahrens 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
    619    789    ahrens 
    620    789    ahrens 	ASSERT(zp->z_dirlocks == NULL);
    621   5446    ahrens 	ASSERT(zp->z_dbuf == NULL);
    622   6712     tomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
    623    789    ahrens 
    624   6712     tomee 	/*
    625   6712     tomee 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
    626   6712     tomee 	 * the zfs_znode_move() callback.
    627   6712     tomee 	 */
    628   5446    ahrens 	zp->z_phys = NULL;
    629   3461    ahrens 	zp->z_unlinked = 0;
    630    789    ahrens 	zp->z_atime_dirty = 0;
    631    789    ahrens 	zp->z_mapcnt = 0;
    632    789    ahrens 	zp->z_last_itx = 0;
    633   5446    ahrens 	zp->z_id = db->db_object;
    634    789    ahrens 	zp->z_blksz = blksz;
    635    789    ahrens 	zp->z_seq = 0x7A4653;
    636   3063    perrin 	zp->z_sync_cnt = 0;
    637   5446    ahrens 
    638   5446    ahrens 	vp = ZTOV(zp);
    639   5446    ahrens 	vn_reinit(vp);
    640   5446    ahrens 
    641   6712     tomee 	zfs_znode_dmu_init(zfsvfs, zp, db);
    642   5446    ahrens 
    643   5326  ek110237 	zp->z_gen = zp->z_phys->zp_gen;
    644    789    ahrens 
    645    789    ahrens 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
    646    789    ahrens 	vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
    647    789    ahrens 
    648    789    ahrens 	switch (vp->v_type) {
    649    789    ahrens 	case VDIR:
    650    789    ahrens 		if (zp->z_phys->zp_flags & ZFS_XATTR) {
    651    789    ahrens 			vn_setops(vp, zfs_xdvnodeops);
    652    789    ahrens 			vp->v_flag |= V_XATTRDIR;
    653   5446    ahrens 		} else {
    654    789    ahrens 			vn_setops(vp, zfs_dvnodeops);
    655   5446    ahrens 		}
    656    869    perrin 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
    657    789    ahrens 		break;
    658    789    ahrens 	case VBLK:
    659    789    ahrens 	case VCHR:
    660   1816     marks 		vp->v_rdev = zfs_cmpldev(zp->z_phys->zp_rdev);
    661    789    ahrens 		/*FALLTHROUGH*/
    662    789    ahrens 	case VFIFO:
    663    789    ahrens 	case VSOCK:
    664    789    ahrens 	case VDOOR:
    665    789    ahrens 		vn_setops(vp, zfs_fvnodeops);
    666    789    ahrens 		break;
    667    789    ahrens 	case VREG:
    668    789    ahrens 		vp->v_flag |= VMODSORT;
    669   8845       amw 		if (zp->z_phys->zp_parent == zfsvfs->z_shares_dir)
    670   8845       amw 			vn_setops(vp, zfs_sharevnodeops);
    671   8845       amw 		else
    672   8845       amw 			vn_setops(vp, zfs_fvnodeops);
    673    789    ahrens 		break;
    674    789    ahrens 	case VLNK:
    675    789    ahrens 		vn_setops(vp, zfs_symvnodeops);
    676    789    ahrens 		break;
    677    789    ahrens 	default:
    678    789    ahrens 		vn_setops(vp, zfs_evnodeops);
    679    789    ahrens 		break;
    680    789    ahrens 	}
    681   6712     tomee 
    682   6712     tomee 	mutex_enter(&zfsvfs->z_znodes_lock);
    683   6712     tomee 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
    684   6712     tomee 	membar_producer();
    685   6712     tomee 	/*
    686   6712     tomee 	 * Everything else must be valid before assigning z_zfsvfs makes the
    687   6712     tomee 	 * znode eligible for zfs_znode_move().
    688   6712     tomee 	 */
    689   6712     tomee 	zp->z_zfsvfs = zfsvfs;
    690   6712     tomee 	mutex_exit(&zfsvfs->z_znodes_lock);
    691    789    ahrens 
    692   5642    maybee 	VFS_HOLD(zfsvfs->z_vfs);
    693    789    ahrens 	return (zp);
    694    789    ahrens }
    695    789    ahrens 
    696    789    ahrens /*
    697    789    ahrens  * Create a new DMU object to hold a zfs znode.
    698    789    ahrens  *
    699    789    ahrens  *	IN:	dzp	- parent directory for new znode
    700    789    ahrens  *		vap	- file attributes for new znode
    701    789    ahrens  *		tx	- dmu transaction id for zap operations
    702    789    ahrens  *		cr	- credentials of caller
    703    789    ahrens  *		flag	- flags:
    704    789    ahrens  *			  IS_ROOT_NODE	- new object will be root
    705    789    ahrens  *			  IS_XATTR	- new object is an attribute
    706    789    ahrens  *			  IS_REPLAY	- intent log replay
    707   5331       amw  *		bonuslen - length of bonus buffer
    708   5331       amw  *		setaclp  - File/Dir initial ACL
    709   5331       amw  *		fuidp	 - Tracks fuid allocation.
    710    789    ahrens  *
    711   5446    ahrens  *	OUT:	zpp	- allocated znode
    712    789    ahrens  *
    713    789    ahrens  */
    714    789    ahrens void
    715   5446    ahrens zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
    716   9179      Mark     uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids)
    717    789    ahrens {
    718   5446    ahrens 	dmu_buf_t	*db;
    719    789    ahrens 	znode_phys_t	*pzp;
    720    789    ahrens 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
    721    789    ahrens 	timestruc_t	now;
    722   5446    ahrens 	uint64_t	gen, obj;
    723    789    ahrens 	int		err;
    724    789    ahrens 
    725    789    ahrens 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
    726    789    ahrens 
    727   8227      Neil 	if (zfsvfs->z_replay) {
    728   5446    ahrens 		obj = vap->va_nodeid;
    729    789    ahrens 		flag |= IS_REPLAY;
    730    789    ahrens 		now = vap->va_ctime;		/* see zfs_replay_create() */
    731    789    ahrens 		gen = vap->va_nblocks;		/* ditto */
    732    789    ahrens 	} else {
    733   5446    ahrens 		obj = 0;
    734    789    ahrens 		gethrestime(&now);
    735    789    ahrens 		gen = dmu_tx_get_txg(tx);
    736    789    ahrens 	}
    737    789    ahrens 
    738    789    ahrens 	/*
    739    789    ahrens 	 * Create a new DMU object.
    740    789    ahrens 	 */
    741   1544  eschrock 	/*
    742   1544  eschrock 	 * There's currently no mechanism for pre-reading the blocks that will
    743   1544  eschrock 	 * be to needed allocate a new object, so we accept the small chance
    744   1544  eschrock 	 * that there will be an i/o error and we will fail one of the
    745   1544  eschrock 	 * assertions below.
    746   1544  eschrock 	 */
    747    789    ahrens 	if (vap->va_type == VDIR) {
    748    789    ahrens 		if (flag & IS_REPLAY) {
    749   5446    ahrens 			err = zap_create_claim_norm(zfsvfs->z_os, obj,
    750   5331       amw 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
    751    789    ahrens 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
    752    789    ahrens 			ASSERT3U(err, ==, 0);
    753    789    ahrens 		} else {
    754   5446    ahrens 			obj = zap_create_norm(zfsvfs->z_os,
    755   5331       amw 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
    756    789    ahrens 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
    757    789    ahrens 		}
    758    789    ahrens 	} else {
    759    789    ahrens 		if (flag & IS_REPLAY) {
    760   5446    ahrens 			err = dmu_object_claim(zfsvfs->z_os, obj,
    761    789    ahrens 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
    762    789    ahrens 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
    763    789    ahrens 			ASSERT3U(err, ==, 0);
    764    789    ahrens 		} else {
    765   5446    ahrens 			obj = dmu_object_alloc(zfsvfs->z_os,
    766    789    ahrens 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
    767    789    ahrens 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
    768    789    ahrens 		}
    769    789    ahrens 	}
    770  10938      Mark 
    771  10938      Mark 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
    772   5446    ahrens 	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db));
    773   5446    ahrens 	dmu_buf_will_dirty(db, tx);
    774    789    ahrens 
    775    789    ahrens 	/*
    776    789    ahrens 	 * Initialize the znode physical data to zero.
    777    789    ahrens 	 */
    778   5446    ahrens 	ASSERT(db->db_size >= sizeof (znode_phys_t));
    779   5446    ahrens 	bzero(db->db_data, db->db_size);
    780   5446    ahrens 	pzp = db->db_data;
    781    789    ahrens 
    782    789    ahrens 	/*
    783    789    ahrens 	 * If this is the root, fix up the half-initialized parent pointer
    784    789    ahrens 	 * to reference the just-allocated physical data area.
    785    789    ahrens 	 */
    786    789    ahrens 	if (flag & IS_ROOT_NODE) {
    787   5642    maybee 		dzp->z_dbuf = db;
    788    789    ahrens 		dzp->z_phys = pzp;
    789   5446    ahrens 		dzp->z_id = obj;
    790    789    ahrens 	}
    791    789    ahrens 
    792    789    ahrens 	/*
    793    789    ahrens 	 * If parent is an xattr, so am I.
    794    789    ahrens 	 */
    795    789    ahrens 	if (dzp->z_phys->zp_flags & ZFS_XATTR)
    796    789    ahrens 		flag |= IS_XATTR;
    797    789    ahrens 
    798    789    ahrens 	if (vap->va_type == VBLK || vap->va_type == VCHR) {
    799   1816     marks 		pzp->zp_rdev = zfs_expldev(vap->va_rdev);
    800    789    ahrens 	}
    801    789    ahrens 
    802   5331       amw 	if (zfsvfs->z_use_fuids)
    803   5331       amw 		pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
    804   5331       amw 
    805    789    ahrens 	if (vap->va_type == VDIR) {
    806    789    ahrens 		pzp->zp_size = 2;		/* contents ("." and "..") */
    807    789    ahrens 		pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
    808    789    ahrens 	}
    809    789    ahrens 
    810    789    ahrens 	pzp->zp_parent = dzp->z_id;
    811    789    ahrens 	if (flag & IS_XATTR)
    812    789    ahrens 		pzp->zp_flags |= ZFS_XATTR;
    813    789    ahrens 
    814    789    ahrens 	pzp->zp_gen = gen;
    815    789    ahrens 
    816    789    ahrens 	ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
    817    789    ahrens 	ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
    818    789    ahrens 
    819    789    ahrens 	if (vap->va_mask & AT_ATIME) {
    820    789    ahrens 		ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
    821    789    ahrens 	} else {
    822    789    ahrens 		ZFS_TIME_ENCODE(&now, pzp->zp_atime);
    823    789    ahrens 	}
    824    789    ahrens 
    825    789    ahrens 	if (vap->va_mask & AT_MTIME) {
    826    789    ahrens 		ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
    827    789    ahrens 	} else {
    828    789    ahrens 		ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
    829    789    ahrens 	}
    830  10938      Mark 	pzp->zp_uid = acl_ids->z_fuid;
    831  10938      Mark 	pzp->zp_gid = acl_ids->z_fgid;
    832  10938      Mark 	pzp->zp_mode = acl_ids->z_mode;
    833   5642    maybee 	if (!(flag & IS_ROOT_NODE)) {
    834   5642    maybee 		*zpp = zfs_znode_alloc(zfsvfs, db, 0);
    835   5642    maybee 	} else {
    836   5642    maybee 		/*
    837   5642    maybee 		 * If we are creating the root node, the "parent" we
    838   5642    maybee 		 * passed in is the znode for the root.
    839   5642    maybee 		 */
    840   5642    maybee 		*zpp = dzp;
    841   5642    maybee 	}
    842   9179      Mark 	VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
    843   9179      Mark 	if (vap->va_mask & AT_XVATTR)
    844   9179      Mark 		zfs_xvattr_set(*zpp, (xvattr_t *)vap);
    845  10938      Mark 
    846  10938      Mark 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
    847   5331       amw }
    848   5331       amw 
    849   5331       amw void
    850   5331       amw zfs_xvattr_set(znode_t *zp, xvattr_t *xvap)
    851   5331       amw {
    852   5331       amw 	xoptattr_t *xoap;
    853   5331       amw 
    854   5331       amw 	xoap = xva_getxoptattr(xvap);
    855   5331       amw 	ASSERT(xoap);
    856   5331       amw 
    857   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
    858   5331       amw 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime);
    859   5331       amw 		XVA_SET_RTN(xvap, XAT_CREATETIME);
    860   5331       amw 	}
    861   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
    862   5331       amw 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly);
    863   5331       amw 		XVA_SET_RTN(xvap, XAT_READONLY);
    864   5331       amw 	}
    865   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
    866   5331       amw 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden);
    867   5331       amw 		XVA_SET_RTN(xvap, XAT_HIDDEN);
    868   5331       amw 	}
    869   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
    870   5331       amw 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system);
    871   5331       amw 		XVA_SET_RTN(xvap, XAT_SYSTEM);
    872   5331       amw 	}
    873   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
    874   5331       amw 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive);
    875   5331       amw 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
    876   5331       amw 	}
    877   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
    878   5331       amw 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable);
    879   5331       amw 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
    880   5331       amw 	}
    881   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
    882   5331       amw 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink);
    883   5331       amw 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
    884   5331       amw 	}
    885   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
    886   5331       amw 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly);
    887   5331       amw 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
    888   5331       amw 	}
    889   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
    890   5331       amw 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump);
    891   5331       amw 		XVA_SET_RTN(xvap, XAT_NODUMP);
    892   5331       amw 	}
    893   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
    894   5331       amw 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque);
    895   5331       amw 		XVA_SET_RTN(xvap, XAT_OPAQUE);
    896   5331       amw 	}
    897   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
    898   5331       amw 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
    899   5331       amw 		    xoap->xoa_av_quarantined);
    900   5331       amw 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
    901   5331       amw 	}
    902   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
    903   5331       amw 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified);
    904   5331       amw 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
    905   5331       amw 	}
    906   5331       amw 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
    907   5331       amw 		(void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp,
    908   5331       amw 		    sizeof (xoap->xoa_av_scanstamp));
    909   5331       amw 		zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP;
    910   5331       amw 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
    911    789    ahrens 	}
    912  10793       dai 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
    913  10793       dai 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse);
    914  10793       dai 		XVA_SET_RTN(xvap, XAT_REPARSE);
    915  10793       dai 	}
    916    789    ahrens }
    917    789    ahrens 
    918    789    ahrens int
    919    789    ahrens zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
    920    789    ahrens {
    921    789    ahrens 	dmu_object_info_t doi;
    922    789    ahrens 	dmu_buf_t	*db;
    923    789    ahrens 	znode_t		*zp;
    924   1544  eschrock 	int err;
    925    789    ahrens 
    926    789    ahrens 	*zpp = NULL;
    927    789    ahrens 
    928    789    ahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
    929    789    ahrens 
    930   1544  eschrock 	err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
    931   1544  eschrock 	if (err) {
    932    789    ahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
    933   1544  eschrock 		return (err);
    934    789    ahrens 	}
    935    789    ahrens 
    936    789    ahrens 	dmu_object_info_from_db(db, &doi);
    937    789    ahrens 	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
    938    789    ahrens 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
    939   1544  eschrock 		dmu_buf_rele(db, NULL);
    940    789    ahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
    941    789    ahrens 		return (EINVAL);
    942    789    ahrens 	}
    943    789    ahrens 
    944    789    ahrens 	zp = dmu_buf_get_user(db);
    945    789    ahrens 	if (zp != NULL) {
    946    789    ahrens 		mutex_enter(&zp->z_lock);
    947    789    ahrens 
    948   5446    ahrens 		/*
    949   5446    ahrens 		 * Since we do immediate eviction of the z_dbuf, we
    950   5446    ahrens 		 * should never find a dbuf with a znode that doesn't
    951   5446    ahrens 		 * know about the dbuf.
    952   5446    ahrens 		 */
    953   5446    ahrens 		ASSERT3P(zp->z_dbuf, ==, db);
    954    789    ahrens 		ASSERT3U(zp->z_id, ==, obj_num);
    955   3461    ahrens 		if (zp->z_unlinked) {
    956   5446    ahrens 			err = ENOENT;
    957    789    ahrens 		} else {
    958   5446    ahrens 			VN_HOLD(ZTOV(zp));
    959   5446    ahrens 			*zpp = zp;
    960   5446    ahrens 			err = 0;
    961    789    ahrens 		}
    962   5446    ahrens 		dmu_buf_rele(db, NULL);
    963    789    ahrens 		mutex_exit(&zp->z_lock);
    964   1544  eschrock 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
    965   5446    ahrens 		return (err);
    966    789    ahrens 	}
    967    789    ahrens 
    968    789    ahrens 	/*
    969    789    ahrens 	 * Not found create new znode/vnode
    970  10938      Mark 	 * but only if file exists.
    971  10938      Mark 	 *
    972  10938      Mark 	 * There is a small window where zfs_vget() could
    973  10938      Mark 	 * find this object while a file create is still in
    974  10938      Mark 	 * progress.  Since a gen number can never be zero
    975  10938      Mark 	 * we will check that to determine if its an allocated
    976  10938      Mark 	 * file.
    977    789    ahrens 	 */
    978  10938      Mark 
    979  10938      Mark 	if (((znode_phys_t *)db->db_data)->zp_gen != 0) {
    980  10938      Mark 		zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size);
    981  10938      Mark 		*zpp = zp;
    982  10938      Mark 		err = 0;
    983  10938      Mark 	} else {
    984  10938      Mark 		dmu_buf_rele(db, NULL);
    985  10938      Mark 		err = ENOENT;
    986  10938      Mark 	}
    987   1544  eschrock 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
    988  10938      Mark 	return (err);
    989   5326  ek110237 }
    990   5326  ek110237 
    991   5326  ek110237 int
    992   5326  ek110237 zfs_rezget(znode_t *zp)
    993   5326  ek110237 {
    994   5326  ek110237 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
    995   5326  ek110237 	dmu_object_info_t doi;
    996   5326  ek110237 	dmu_buf_t *db;
    997   5326  ek110237 	uint64_t obj_num = zp->z_id;
    998   5326  ek110237 	int err;
    999   5326  ek110237 
   1000   5326  ek110237 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
   1001   5326  ek110237 
   1002   5326  ek110237 	err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
   1003   5326  ek110237 	if (err) {
   1004   5326  ek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
   1005   5326  ek110237 		return (err);
   1006   5326  ek110237 	}
   1007   5326  ek110237 
   1008   5326  ek110237 	dmu_object_info_from_db(db, &doi);
   1009   5326  ek110237 	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
   1010   5326  ek110237 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
   1011   5326  ek110237 		dmu_buf_rele(db, NULL);
   1012   5326  ek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
   1013   5326  ek110237 		return (EINVAL);
   1014   5326  ek110237 	}
   1015   5326  ek110237 
   1016   5326  ek110237 	if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) {
   1017   5326  ek110237 		dmu_buf_rele(db, NULL);
   1018   5326  ek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
   1019   5326  ek110237 		return (EIO);
   1020   5326  ek110237 	}
   1021  10269      Mark 
   1022  10269      Mark 	mutex_enter(&zp->z_acl_lock);
   1023  10269      Mark 	if (zp->z_acl_cached) {
   1024  10269      Mark 		zfs_acl_free(zp->z_acl_cached);
   1025  10269      Mark 		zp->z_acl_cached = NULL;
   1026  10269      Mark 	}
   1027  10269      Mark 	mutex_exit(&zp->z_acl_lock);
   1028   5326  ek110237 
   1029   6712     tomee 	zfs_znode_dmu_init(zfsvfs, zp, db);
   1030   5326  ek110237 	zp->z_unlinked = (zp->z_phys->zp_links == 0);
   1031   5844  ek110237 	zp->z_blksz = doi.doi_data_block_size;
   1032   5326  ek110237 
   1033   5326  ek110237 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
   1034   5326  ek110237 
   1035    789    ahrens 	return (0);
   1036    789    ahrens }
   1037    789    ahrens 
   1038    789    ahrens void
   1039    789    ahrens zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
   1040    789    ahrens {
   1041    789    ahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1042   6992    maybee 	objset_t *os = zfsvfs->z_os;
   1043   5446    ahrens 	uint64_t obj = zp->z_id;
   1044   6992    maybee 	uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
   1045    789    ahrens 
   1046   5446    ahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
   1047   6992    maybee 	if (acl_obj)
   1048   6992    maybee 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
   1049   6992    maybee 	VERIFY(0 == dmu_object_free(os, obj, tx));
   1050   5446    ahrens 	zfs_znode_dmu_fini(zp);
   1051   5446    ahrens 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
   1052   5642    maybee 	zfs_znode_free(zp);
   1053    789    ahrens }
   1054    789    ahrens 
   1055    789    ahrens void
   1056    789    ahrens zfs_zinactive(znode_t *zp)
   1057    789    ahrens {
   1058    789    ahrens 	vnode_t	*vp = ZTOV(zp);
   1059    789    ahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1060    789    ahrens 	uint64_t z_id = zp->z_id;
   1061    789    ahrens 
   1062   5446    ahrens 	ASSERT(zp->z_dbuf && zp->z_phys);
   1063    789    ahrens 
   1064    789    ahrens 	/*
   1065    789    ahrens 	 * Don't allow a zfs_zget() while were trying to release this znode
   1066    789    ahrens 	 */
   1067    789    ahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
   1068    789    ahrens 
   1069    789    ahrens 	mutex_enter(&zp->z_lock);
   1070    789    ahrens 	mutex_enter(&vp->v_lock);
   1071    789    ahrens 	vp->v_count--;
   1072    789    ahrens 	if (vp->v_count > 0 || vn_has_cached_data(vp)) {
   1073    789    ahrens 		/*
   1074    789    ahrens 		 * If the hold count is greater than zero, somebody has
   1075    789    ahrens 		 * obtained a new reference on this znode while we were
   1076    789    ahrens 		 * processing it here, so we are done.  If we still have
   1077    789    ahrens 		 * mapped pages then we are also done, since we don't
   1078    789    ahrens 		 * want to inactivate the znode until the pages get pushed.
   1079    789    ahrens 		 *
   1080    789    ahrens 		 * XXX - if vn_has_cached_data(vp) is true, but count == 0,
   1081    789    ahrens 		 * this seems like it would leave the znode hanging with
   1082    789    ahrens 		 * no chance to go inactive...
   1083    789    ahrens 		 */
   1084    789    ahrens 		mutex_exit(&vp->v_lock);
   1085    789    ahrens 		mutex_exit(&zp->z_lock);
   1086    789    ahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
   1087    789    ahrens 		return;
   1088    789    ahrens 	}
   1089    789    ahrens 	mutex_exit(&vp->v_lock);
   1090    789    ahrens 
   1091    789    ahrens 	/*
   1092    789    ahrens 	 * If this was the last reference to a file with no links,
   1093    789    ahrens 	 * remove the file from the file system.
   1094    789    ahrens 	 */
   1095   3461    ahrens 	if (zp->z_unlinked) {
   1096    789    ahrens 		mutex_exit(&zp->z_lock);
   1097    789    ahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
   1098   3461    ahrens 		zfs_rmnode(zp);
   1099    789    ahrens 		return;
   1100    789    ahrens 	}
   1101    789    ahrens 	mutex_exit(&zp->z_lock);
   1102   5446    ahrens 	zfs_znode_dmu_fini(zp);
   1103    789    ahrens 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
   1104   5642    maybee 	zfs_znode_free(zp);
   1105    789    ahrens }
   1106    789    ahrens 
   1107    789    ahrens void
   1108    789    ahrens zfs_znode_free(znode_t *zp)
   1109    789    ahrens {
   1110    789    ahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1111    789    ahrens 
   1112   5642    maybee 	vn_invalid(ZTOV(zp));
   1113   5642    maybee 
   1114   6712     tomee 	ASSERT(ZTOV(zp)->v_count == 0);
   1115   6712     tomee 
   1116    789    ahrens 	mutex_enter(&zfsvfs->z_znodes_lock);
   1117   6712     tomee 	POINTER_INVALIDATE(&zp->z_zfsvfs);
   1118    789    ahrens 	list_remove(&zfsvfs->z_all_znodes, zp);
   1119    789    ahrens 	mutex_exit(&zfsvfs->z_znodes_lock);
   1120    789    ahrens 
   1121   9981       Tim 	if (zp->z_acl_cached) {
   1122   9981       Tim 		zfs_acl_free(zp->z_acl_cached);
   1123   9981       Tim 		zp->z_acl_cached = NULL;
   1124   9981       Tim 	}
   1125   9981       Tim 
   1126    789    ahrens 	kmem_cache_free(znode_cache, zp);
   1127   5642    maybee 
   1128   5642    maybee 	VFS_RELE(zfsvfs->z_vfs);
   1129    789    ahrens }
   1130    789    ahrens 
   1131    789    ahrens void
   1132    789    ahrens zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx)
   1133    789    ahrens {
   1134    789    ahrens 	timestruc_t	now;
   1135    789    ahrens 
   1136    789    ahrens 	ASSERT(MUTEX_HELD(&zp->z_lock));
   1137    789    ahrens 
   1138    789    ahrens 	gethrestime(&now);
   1139    789    ahrens 
   1140    789    ahrens 	if (tx) {
   1141    789    ahrens 		dmu_buf_will_dirty(zp->z_dbuf, tx);
   1142    789    ahrens 		zp->z_atime_dirty = 0;
   1143    789    ahrens 		zp->z_seq++;
   1144    789    ahrens 	} else {
   1145    789    ahrens 		zp->z_atime_dirty = 1;
   1146    789    ahrens 	}
   1147    789    ahrens 
   1148    789    ahrens 	if (flag & AT_ATIME)
   1149    789    ahrens 		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime);
   1150    789    ahrens 
   1151   5331       amw 	if (flag & AT_MTIME) {
   1152    789    ahrens 		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime);
   1153   5331       amw 		if (zp->z_zfsvfs->z_use_fuids)
   1154   5331       amw 			zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED);
   1155   5331       amw 	}
   1156    789    ahrens 
   1157   5331       amw 	if (flag & AT_CTIME) {
   1158    789    ahrens 		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime);
   1159   5331       amw 		if (zp->z_zfsvfs->z_use_fuids)
   1160   5331       amw 			zp->z_phys->zp_flags |= ZFS_ARCHIVE;
   1161   5331       amw 	}
   1162    789    ahrens }
   1163    789    ahrens 
   1164    789    ahrens /*
   1165    789    ahrens  * Update the requested znode timestamps with the current time.
   1166    789    ahrens  * If we are in a transaction, then go ahead and mark the znode
   1167    789    ahrens  * dirty in the transaction so the timestamps will go to disk.
   1168    789    ahrens  * Otherwise, we will get pushed next time the znode is updated
   1169    789    ahrens  * in a transaction, or when this znode eventually goes inactive.
   1170    789    ahrens  *
   1171    789    ahrens  * Why is this OK?
   1172    789    ahrens  *  1 - Only the ACCESS time is ever updated outside of a transaction.
   1173    789    ahrens  *  2 - Multiple consecutive updates will be collapsed into a single
   1174    789    ahrens  *	znode update by the transaction grouping semantics of the DMU.
   1175    789    ahrens  */
   1176    789    ahrens void
   1177    789    ahrens zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx)
   1178    789    ahrens {
   1179    789    ahrens 	mutex_enter(&zp->z_lock);
   1180    789    ahrens 	zfs_time_stamper_locked(zp, flag, tx);
   1181    789    ahrens 	mutex_exit(&zp->z_lock);
   1182    789    ahrens }
   1183    789    ahrens 
   1184    789    ahrens /*
   1185   1669    perrin  * Grow the block size for a file.
   1186    789    ahrens  *
   1187    789    ahrens  *	IN:	zp	- znode of file to free data in.
   1188    789    ahrens  *		size	- requested block size
   1189    789    ahrens  *		tx	- open transaction.
   1190    789    ahrens  *
   1191    789    ahrens  * NOTE: this function assumes that the znode is write locked.
   1192    789    ahrens  */
   1193   1669    perrin void
   1194    789    ahrens zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
   1195    789    ahrens {
   1196    789    ahrens 	int		error;
   1197    789    ahrens 	u_longlong_t	dummy;
   1198    789    ahrens 
   1199    789    ahrens 	if (size <= zp->z_blksz)
   1200   1669    perrin 		return;
   1201    789    ahrens 	/*
   1202    789    ahrens 	 * If the file size is already greater than the current blocksize,
   1203    789    ahrens 	 * we will not grow.  If there is more than one block in a file,
   1204    789    ahrens 	 * the blocksize cannot change.
   1205    789    ahrens 	 */
   1206    789    ahrens 	if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz)
   1207   1669    perrin 		return;
   1208    789    ahrens 
   1209    789    ahrens 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
   1210    789    ahrens 	    size, 0, tx);
   1211    789    ahrens 	if (error == ENOTSUP)
   1212   1669    perrin 		return;
   1213    789    ahrens 	ASSERT3U(error, ==, 0);
   1214    789    ahrens 
   1215    789    ahrens 	/* What blocksize did we actually get? */
   1216    789    ahrens 	dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy);
   1217    789    ahrens }
   1218    789    ahrens 
   1219    789    ahrens /*
   1220    789    ahrens  * This is a dummy interface used when pvn_vplist_dirty() should *not*
   1221    789    ahrens  * be calling back into the fs for a putpage().  E.g.: when truncating
   1222    789    ahrens  * a file, the pages being "thrown away* don't need to be written out.
   1223    789    ahrens  */
   1224    789    ahrens /* ARGSUSED */
   1225    789    ahrens static int
   1226    789    ahrens zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
   1227    789    ahrens     int flags, cred_t *cr)
   1228    789    ahrens {
   1229    789    ahrens 	ASSERT(0);
   1230    789    ahrens 	return (0);
   1231    789    ahrens }
   1232    789    ahrens 
   1233    789    ahrens /*
   1234   6992    maybee  * Increase the file length
   1235    789    ahrens  *
   1236    789    ahrens  *	IN:	zp	- znode of file to free data in.
   1237   6992    maybee  *		end	- new end-of-file
   1238    789    ahrens  *
   1239    789    ahrens  * 	RETURN:	0 if success
   1240    789    ahrens  *		error code if failure
   1241    789    ahrens  */
   1242   6992    maybee static int
   1243   6992    maybee zfs_extend(znode_t *zp, uint64_t end)
   1244    789    ahrens {
   1245   6992    maybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1246   1878    maybee 	dmu_tx_t *tx;
   1247   1878    maybee 	rl_t *rl;
   1248   6992    maybee 	uint64_t newblksz;
   1249   1669    perrin 	int error;
   1250   5331       amw 
   1251    789    ahrens 	/*
   1252   6992    maybee 	 * We will change zp_size, lock the whole file.
   1253   1878    maybee 	 */
   1254   6992    maybee 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
   1255   1878    maybee 
   1256   1878    maybee 	/*
   1257    789    ahrens 	 * Nothing to do if file already at desired length.
   1258    789    ahrens 	 */
   1259   6992    maybee 	if (end <= zp->z_phys->zp_size) {
   1260   2237    maybee 		zfs_range_unlock(rl);
   1261    789    ahrens 		return (0);
   1262    789    ahrens 	}
   1263   6992    maybee top:
   1264   1878    maybee 	tx = dmu_tx_create(zfsvfs->z_os);
   1265   1878    maybee 	dmu_tx_hold_bonus(tx, zp->z_id);
   1266   6992    maybee 	if (end > zp->z_blksz &&
   1267   1878    maybee 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
   1268    789    ahrens 		/*
   1269    789    ahrens 		 * We are growing the file past the current block size.
   1270    789    ahrens 		 */
   1271    789    ahrens 		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
   1272    789    ahrens 			ASSERT(!ISP2(zp->z_blksz));
   1273   6992    maybee 			newblksz = MIN(end, SPA_MAXBLOCKSIZE);
   1274    789    ahrens 		} else {
   1275   6992    maybee 			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
   1276    789    ahrens 		}
   1277   6992    maybee 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
   1278   6992    maybee 	} else {
   1279   6992    maybee 		newblksz = 0;
   1280   1878    maybee 	}
   1281   1878    maybee 
   1282   8227      Neil 	error = dmu_tx_assign(tx, TXG_NOWAIT);
   1283   1878    maybee 	if (error) {
   1284   8227      Neil 		if (error == ERESTART) {
   1285   2113    ahrens 			dmu_tx_wait(tx);
   1286   6992    maybee 			dmu_tx_abort(tx);
   1287   6992    maybee 			goto top;
   1288   6992    maybee 		}
   1289   1878    maybee 		dmu_tx_abort(tx);
   1290   2237    maybee 		zfs_range_unlock(rl);
   1291   1878    maybee 		return (error);
   1292   1878    maybee 	}
   1293   6992    maybee 	dmu_buf_will_dirty(zp->z_dbuf, tx);
   1294   1878    maybee 
   1295   6992    maybee 	if (newblksz)
   1296   6992    maybee 		zfs_grow_blocksize(zp, newblksz, tx);
   1297   1878    maybee 
   1298   6992    maybee 	zp->z_phys->zp_size = end;
   1299   1878    maybee 
   1300   2237    maybee 	zfs_range_unlock(rl);
   1301   1878    maybee 
   1302   1878    maybee 	dmu_tx_commit(tx);
   1303   6992    maybee 
   1304   6992    maybee 	return (0);
   1305   6992    maybee }
   1306   6992    maybee 
   1307   6992    maybee /*
   1308   6992    maybee  * Free space in a file.
   1309   6992    maybee  *
   1310   6992    maybee  *	IN:	zp	- znode of file to free data in.
   1311   6992    maybee  *		off	- start of section to free.
   1312   6992    maybee  *		len	- length of section to free.
   1313   6992    maybee  *
   1314   6992    maybee  * 	RETURN:	0 if success
   1315   6992    maybee  *		error code if failure
   1316   6992    maybee  */
   1317   6992    maybee static int
   1318   6992    maybee zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
   1319   6992    maybee {
   1320   6992    maybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1321   6992    maybee 	rl_t *rl;
   1322   6992    maybee 	int error;
   1323   6992    maybee 
   1324   6992    maybee 	/*
   1325   6992    maybee 	 * Lock the range being freed.
   1326   6992    maybee 	 */
   1327   6992    maybee 	rl = zfs_range_lock(zp, off, len, RL_WRITER);
   1328   6992    maybee 
   1329   6992    maybee 	/*
   1330   6992    maybee 	 * Nothing to do if file already at desired length.
   1331   6992    maybee 	 */
   1332   6992    maybee 	if (off >= zp->z_phys->zp_size) {
   1333   6992    maybee 		zfs_range_unlock(rl);
   1334   6992    maybee 		return (0);
   1335   6992    maybee 	}
   1336   6992    maybee 
   1337   6992    maybee 	if (off + len > zp->z_phys->zp_size)
   1338   6992    maybee 		len = zp->z_phys->zp_size - off;
   1339   6992    maybee 
   1340   6992    maybee 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
   1341   6992    maybee 
   1342   6992    maybee 	zfs_range_unlock(rl);
   1343   6992    maybee 
   1344   6992    maybee 	return (error);
   1345   6992    maybee }
   1346   6992    maybee 
   1347   6992    maybee /*
   1348   6992    maybee  * Truncate a file
   1349   6992    maybee  *
   1350   6992    maybee  *	IN:	zp	- znode of file to free data in.
   1351   6992    maybee  *		end	- new end-of-file.
   1352   6992    maybee  *
   1353   6992    maybee  * 	RETURN:	0 if success
   1354   6992    maybee  *		error code if failure
   1355   6992    maybee  */
   1356   6992    maybee static int
   1357   6992    maybee zfs_trunc(znode_t *zp, uint64_t end)
   1358   6992    maybee {
   1359   6992    maybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1360   6992    maybee 	vnode_t *vp = ZTOV(zp);
   1361   6992    maybee 	dmu_tx_t *tx;
   1362   6992    maybee 	rl_t *rl;
   1363   6992    maybee 	int error;
   1364   6992    maybee 
   1365   6992    maybee 	/*
   1366   6992    maybee 	 * We will change zp_size, lock the whole file.
   1367   6992    maybee 	 */
   1368   6992    maybee 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
   1369   6992    maybee 
   1370   6992    maybee 	/*
   1371   6992    maybee 	 * Nothing to do if file already at desired length.
   1372   6992    maybee 	 */
   1373   6992    maybee 	if (end >= zp->z_phys->zp_size) {
   1374   6992    maybee 		zfs_range_unlock(rl);
   1375   6992    maybee 		return (0);
   1376   6992    maybee 	}
   1377   6992    maybee 
   1378   6992    maybee 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
   1379   6992    maybee 	if (error) {
   1380   6992    maybee 		zfs_range_unlock(rl);
   1381   6992    maybee 		return (error);
   1382   6992    maybee 	}
   1383   6992    maybee top:
   1384   6992    maybee 	tx = dmu_tx_create(zfsvfs->z_os);
   1385   6992    maybee 	dmu_tx_hold_bonus(tx, zp->z_id);
   1386   8227      Neil 	error = dmu_tx_assign(tx, TXG_NOWAIT);
   1387   6992    maybee 	if (error) {
   1388   8227      Neil 		if (error == ERESTART) {
   1389   6992    maybee 			dmu_tx_wait(tx);
   1390   6992    maybee 			dmu_tx_abort(tx);
   1391   6992    maybee 			goto top;
   1392   6992    maybee 		}
   1393   6992    maybee 		dmu_tx_abort(tx);
   1394   6992    maybee 		zfs_range_unlock(rl);
   1395   6992    maybee 		return (error);
   1396   6992    maybee 	}
   1397   6992    maybee 	dmu_buf_will_dirty(zp->z_dbuf, tx);
   1398   6992    maybee 
   1399   6992    maybee 	zp->z_phys->zp_size = end;
   1400   6992    maybee 
   1401   6992    maybee 	dmu_tx_commit(tx);
   1402   6992    maybee 
   1403    789    ahrens 	/*
   1404   1878    maybee 	 * Clear any mapped pages in the truncated region.  This has to
   1405   1878    maybee 	 * happen outside of the transaction to avoid the possibility of
   1406   1878    maybee 	 * a deadlock with someone trying to push a page that we are
   1407   1878    maybee 	 * about to invalidate.
   1408    789    ahrens 	 */
   1409   6992    maybee 	if (vn_has_cached_data(vp)) {
   1410    789    ahrens 		page_t *pp;
   1411   6992    maybee 		uint64_t start = end & PAGEMASK;
   1412   6992    maybee 		int poff = end & PAGEOFFSET;
   1413    789    ahrens 
   1414   1878    maybee 		if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
   1415    789    ahrens 			/*
   1416    789    ahrens 			 * We need to zero a partial page.
   1417    789    ahrens 			 */
   1418   1878    maybee 			pagezero(pp, poff, PAGESIZE - poff);
   1419    789    ahrens 			start += PAGESIZE;
   1420    789    ahrens 			page_unlock(pp);
   1421    789    ahrens 		}
   1422    789    ahrens 		error = pvn_vplist_dirty(vp, start, zfs_no_putpage,
   1423   1878    maybee 		    B_INVAL | B_TRUNC, NULL);
   1424    789    ahrens 		ASSERT(error == 0);
   1425    789    ahrens 	}
   1426   8636      Mark 
   1427   8636      Mark 	zfs_range_unlock(rl);
   1428    789    ahrens 
   1429   6992    maybee 	return (0);
   1430   6992    maybee }
   1431   6992    maybee 
   1432   6992    maybee /*
   1433   6992    maybee  * Free space in a file
   1434   6992    maybee  *
   1435   6992    maybee  *	IN:	zp	- znode of file to free data in.
   1436   6992    maybee  *		off	- start of range
   1437   6992    maybee  *		len	- end of range (0 => EOF)
   1438   6992    maybee  *		flag	- current file open mode flags.
   1439   6992    maybee  *		log	- TRUE if this action should be logged
   1440   6992    maybee  *
   1441   6992    maybee  * 	RETURN:	0 if success
   1442   6992    maybee  *		error code if failure
   1443   6992    maybee  */
   1444   6992    maybee int
   1445   6992    maybee zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
   1446   6992    maybee {
   1447   6992    maybee 	vnode_t *vp = ZTOV(zp);
   1448   6992    maybee 	dmu_tx_t *tx;
   1449   6992    maybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1450   6992    maybee 	zilog_t *zilog = zfsvfs->z_log;
   1451   6992    maybee 	int error;
   1452   6992    maybee 
   1453   6992    maybee 	if (off > zp->z_phys->zp_size) {
   1454   6992    maybee 		error =  zfs_extend(zp, off+len);
   1455   6992    maybee 		if (error == 0 && log)
   1456   6992    maybee 			goto log;
   1457   6992    maybee 		else
   1458   6992    maybee 			return (error);
   1459   6992    maybee 	}
   1460   6992    maybee 
   1461   6992    maybee 	/*
   1462   6992    maybee 	 * Check for any locks in the region to be freed.
   1463   6992    maybee 	 */
   1464   6992    maybee 	if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) {
   1465   6992    maybee 		uint64_t length = (len ? len : zp->z_phys->zp_size - off);
   1466   6992    maybee 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
   1467   6992    maybee 			return (error);
   1468   6992    maybee 	}
   1469   6992    maybee 
   1470   6992    maybee 	if (len == 0) {
   1471   6992    maybee 		error = zfs_trunc(zp, off);
   1472   6992    maybee 	} else {
   1473   6992    maybee 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
   1474   6992    maybee 		    off + len > zp->z_phys->zp_size)
   1475   6992    maybee 			error = zfs_extend(zp, off+len);
   1476   6992    maybee 	}
   1477   6992    maybee 	if (error || !log)
   1478   6992    maybee 		return (error);
   1479   6992    maybee log:
   1480   6992    maybee 	tx = dmu_tx_create(zfsvfs->z_os);
   1481   6992    maybee 	dmu_tx_hold_bonus(tx, zp->z_id);
   1482   8227      Neil 	error = dmu_tx_assign(tx, TXG_NOWAIT);
   1483   6992    maybee 	if (error) {
   1484   8227      Neil 		if (error == ERESTART) {
   1485   6992    maybee 			dmu_tx_wait(tx);
   1486   6992    maybee 			dmu_tx_abort(tx);
   1487   6992    maybee 			goto log;
   1488   6992    maybee 		}
   1489   6992    maybee 		dmu_tx_abort(tx);
   1490   6992    maybee 		return (error);
   1491   6992    maybee 	}
   1492   6992    maybee 
   1493   6992    maybee 	zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
   1494   6992    maybee 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
   1495   6992    maybee 
   1496   6992    maybee 	dmu_tx_commit(tx);
   1497    789    ahrens 	return (0);
   1498    789    ahrens }
   1499    789    ahrens 
   1500    789    ahrens void
   1501   5498      timh zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
   1502    789    ahrens {
   1503    789    ahrens 	zfsvfs_t	zfsvfs;
   1504   9396   Matthew 	uint64_t	moid, obj, version;
   1505   5498      timh 	uint64_t	sense = ZFS_CASE_SENSITIVE;
   1506   5498      timh 	uint64_t	norm = 0;
   1507   5498      timh 	nvpair_t	*elem;
   1508    789    ahrens 	int		error;
   1509  10938      Mark 	int		i;
   1510    789    ahrens 	znode_t		*rootzp = NULL;
   1511    789    ahrens 	vnode_t		*vp;
   1512    789    ahrens 	vattr_t		vattr;
   1513   5446    ahrens 	znode_t		*zp;
   1514   9179      Mark 	zfs_acl_ids_t	acl_ids;
   1515    789    ahrens 
   1516    789    ahrens 	/*
   1517    789    ahrens 	 * First attempt to create master node.
   1518   1544  eschrock 	 */
   1519   1544  eschrock 	/*
   1520   1544  eschrock 	 * In an empty objset, there are no blocks to read and thus
   1521   1544  eschrock 	 * there can be no i/o errors (which we assert below).
   1522    789    ahrens 	 */
   1523    789    ahrens 	moid = MASTER_NODE_OBJ;
   1524    789    ahrens 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
   1525    789    ahrens 	    DMU_OT_NONE, 0, tx);
   1526    789    ahrens 	ASSERT(error == 0);
   1527    789    ahrens 
   1528    789    ahrens 	/*
   1529    789    ahrens 	 * Set starting attributes.
   1530    789    ahrens 	 */
   1531   9396   Matthew 	if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_USERSPACE)
   1532   7046    ahrens 		version = ZPL_VERSION;
   1533   9396   Matthew 	else if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
   1534   9396   Matthew 		version = ZPL_VERSION_USERSPACE - 1;
   1535   7046    ahrens 	else
   1536   7046    ahrens 		version = ZPL_VERSION_FUID - 1;
   1537   5498      timh 	elem = NULL;
   1538   5498      timh 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
   1539   5498      timh 		/* For the moment we expect all zpl props to be uint64_ts */
   1540   5498      timh 		uint64_t val;
   1541   5498      timh 		char *name;
   1542    789    ahrens 
   1543   5498      timh 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
   1544   5520      timh 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
   1545   5498      timh 		name = nvpair_name(elem);
   1546   5498      timh 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
   1547   9396   Matthew 			if (val < version)
   1548   9396   Matthew 				version = val;
   1549   5498      timh 		} else {
   1550   5498      timh 			error = zap_update(os, moid, name, 8, 1, &val, tx);
   1551   5498      timh 		}
   1552   5498      timh 		ASSERT(error == 0);
   1553   5498      timh 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
   1554   5498      timh 			norm = val;
   1555   5498      timh 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
   1556   5498      timh 			sense = val;
   1557   5498      timh 	}
   1558   5498      timh 	ASSERT(version != 0);
   1559   9396   Matthew 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
   1560    789    ahrens 
   1561    789    ahrens 	/*
   1562    789    ahrens 	 * Create a delete queue.
   1563    789    ahrens 	 */
   1564   9396   Matthew 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
   1565    789    ahrens 
   1566   9396   Matthew 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
   1567    789    ahrens 	ASSERT(error == 0);
   1568    789    ahrens 
   1569    789    ahrens 	/*
   1570    789    ahrens 	 * Create root znode.  Create minimal znode/vnode/zfsvfs
   1571    789    ahrens 	 * to allow zfs_mknode to work.
   1572    789    ahrens 	 */
   1573    789    ahrens 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
   1574    789    ahrens 	vattr.va_type = VDIR;
   1575    789    ahrens 	vattr.va_mode = S_IFDIR|0755;
   1576   4543     marks 	vattr.va_uid = crgetuid(cr);
   1577   4543     marks 	vattr.va_gid = crgetgid(cr);
   1578    789    ahrens 
   1579    789    ahrens 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
   1580   3461    ahrens 	rootzp->z_unlinked = 0;
   1581    789    ahrens 	rootzp->z_atime_dirty = 0;
   1582    789    ahrens 
   1583    789    ahrens 	vp = ZTOV(rootzp);
   1584    789    ahrens 	vn_reinit(vp);
   1585    789    ahrens 	vp->v_type = VDIR;
   1586    789    ahrens 
   1587    789    ahrens 	bzero(&zfsvfs, sizeof (zfsvfs_t));
   1588    789    ahrens 
   1589    789    ahrens 	zfsvfs.z_os = os;
   1590    789    ahrens 	zfsvfs.z_parent = &zfsvfs;
   1591   5331       amw 	zfsvfs.z_version = version;
   1592   5331       amw 	zfsvfs.z_use_fuids = USE_FUIDS(version, os);
   1593   5331       amw 	zfsvfs.z_norm = norm;
   1594   5498      timh 	/*
   1595   5498      timh 	 * Fold case on file systems that are always or sometimes case
   1596   5498      timh 	 * insensitive.
   1597   5498      timh 	 */
   1598   5498      timh 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
   1599   5498      timh 		zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER;
   1600    789    ahrens 
   1601    789    ahrens 	mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
   1602    789    ahrens 	list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
   1603    789    ahrens 	    offsetof(znode_t, z_link_node));
   1604    789    ahrens 
   1605  10938      Mark 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
   1606  10938      Mark 		mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
   1607  10938      Mark 
   1608   6712     tomee 	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
   1609   6712     tomee 	rootzp->z_zfsvfs = &zfsvfs;
   1610   9179      Mark 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
   1611   9179      Mark 	    cr, NULL, &acl_ids));
   1612   9179      Mark 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, &acl_ids);
   1613   5642    maybee 	ASSERT3P(zp, ==, rootzp);
   1614   6712     tomee 	ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
   1615   5446    ahrens 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
   1616    789    ahrens 	ASSERT(error == 0);
   1617   9179      Mark 	zfs_acl_ids_free(&acl_ids);
   1618   6712     tomee 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
   1619    789    ahrens 
   1620    789    ahrens 	ZTOV(rootzp)->v_count = 0;
   1621   5642    maybee 	dmu_buf_rele(rootzp->z_dbuf, NULL);
   1622   5642    maybee 	rootzp->z_dbuf = NULL;
   1623    789    ahrens 	kmem_cache_free(znode_cache, rootzp);
   1624   8845       amw 
   1625   8845       amw 	/*
   1626   8845       amw 	 * Create shares directory
   1627   8845       amw 	 */
   1628   8845       amw 
   1629   8845       amw 	error = zfs_create_share_dir(&zfsvfs, tx);
   1630   9179      Mark 
   1631   8845       amw 	ASSERT(error == 0);
   1632  10938      Mark 
   1633  10938      Mark 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
   1634  10938      Mark 		mutex_destroy(&zfsvfs.z_hold_mtx[i]);
   1635    789    ahrens }
   1636   5331       amw 
   1637   3444  ek110237 #endif /* _KERNEL */
   1638   3444  ek110237 /*
   1639   3444  ek110237  * Given an object number, return its parent object number and whether
   1640   3444  ek110237  * or not the object is an extended attribute directory.
   1641   3444  ek110237  */
   1642   3444  ek110237 static int
   1643   3444  ek110237 zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir)
   1644   3444  ek110237 {
   1645   3444  ek110237 	dmu_buf_t *db;
   1646   3444  ek110237 	dmu_object_info_t doi;
   1647   3444  ek110237 	znode_phys_t *zp;
   1648   3444  ek110237 	int error;
   1649   3444  ek110237 
   1650   3444  ek110237 	if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0)
   1651   3444  ek110237 		return (error);
   1652   3444  ek110237 
   1653   3444  ek110237 	dmu_object_info_from_db(db, &doi);
   1654   3444  ek110237 	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
   1655   3444  ek110237 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
   1656   3444  ek110237 		dmu_buf_rele(db, FTAG);
   1657   3444  ek110237 		return (EINVAL);
   1658   3444  ek110237 	}
   1659   3444  ek110237 
   1660   3444  ek110237 	zp = db->db_data;
   1661   3444  ek110237 	*pobjp = zp->zp_parent;
   1662   3444  ek110237 	*is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) &&
   1663   3444  ek110237 	    S_ISDIR(zp->zp_mode);
   1664   3444  ek110237 	dmu_buf_rele(db, FTAG);
   1665   3444  ek110237 
   1666   3444  ek110237 	return (0);
   1667   3444  ek110237 }
   1668   3444  ek110237 
   1669   3444  ek110237 int
   1670   3444  ek110237 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
   1671   3444  ek110237 {
   1672   3444  ek110237 	char *path = buf + len - 1;
   1673   3444  ek110237 	int error;
   1674   3444  ek110237 
   1675   3444  ek110237 	*path = '\0';
   1676   3444  ek110237 
   1677   3444  ek110237 	for (;;) {
   1678   3444  ek110237 		uint64_t pobj;
   1679   3444  ek110237 		char component[MAXNAMELEN + 2];
   1680   3444  ek110237 		size_t complen;
   1681   3444  ek110237 		int is_xattrdir;
   1682   3444  ek110237 
   1683   3444  ek110237 		if ((error = zfs_obj_to_pobj(osp, obj, &pobj,
   1684   3444  ek110237 		    &is_xattrdir)) != 0)
   1685   3444  ek110237 			break;
   1686   3444  ek110237 
   1687   3444  ek110237 		if (pobj == obj) {
   1688   3444  ek110237 			if (path[0] != '/')
   1689   3444  ek110237 				*--path = '/';
   1690   3444  ek110237 			break;
   1691   3444  ek110237 		}
   1692   3444  ek110237 
   1693   3444  ek110237 		component[0] = '/';
   1694   3444  ek110237 		if (is_xattrdir) {
   1695   3444  ek110237 			(void) sprintf(component + 1, "<xattrdir>");
   1696   3444  ek110237 		} else {
   1697   4577    ahrens 			error = zap_value_search(osp, pobj, obj,
   1698   4577    ahrens 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
   1699   3444  ek110237 			if (error != 0)
   1700   3444  ek110237 				break;
   1701   3444  ek110237 		}
   1702   3444  ek110237 
   1703   3444  ek110237 		complen = strlen(component);
   1704   3444  ek110237 		path -= complen;
   1705   3444  ek110237 		ASSERT(path >= buf);
   1706   3444  ek110237 		bcopy(component, path, complen);
   1707   3444  ek110237 		obj = pobj;
   1708   3444  ek110237 	}
   1709   3444  ek110237 
   1710   3444  ek110237 	if (error == 0)
   1711   3444  ek110237 		(void) memmove(buf, path, buf + len - path);
   1712   3444  ek110237 	return (error);
   1713   3444  ek110237 }
   1714