Home | History | Annotate | Download | only in zfs
      1    789     ahrens /*
      2    789     ahrens  * CDDL HEADER START
      3    789     ahrens  *
      4    789     ahrens  * The contents of this file are subject to the terms of the
      5   1544   eschrock  * Common Development and Distribution License (the "License").
      6   1544   eschrock  * You may not use this file except in compliance with the License.
      7    789     ahrens  *
      8    789     ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789     ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789     ahrens  * See the License for the specific language governing permissions
     11    789     ahrens  * and limitations under the License.
     12    789     ahrens  *
     13    789     ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789     ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789     ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789     ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789     ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789     ahrens  *
     19    789     ahrens  * CDDL HEADER END
     20    789     ahrens  */
     21    789     ahrens /*
     22   8517       Eric  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789     ahrens  * Use is subject to license terms.
     24    789     ahrens  */
     25    789     ahrens 
     26    789     ahrens #include <sys/dmu_objset.h>
     27    789     ahrens #include <sys/dsl_dataset.h>
     28    789     ahrens #include <sys/dsl_dir.h>
     29   2082   eschrock #include <sys/dsl_prop.h>
     30   2199     ahrens #include <sys/dsl_synctask.h>
     31    789     ahrens #include <sys/dmu_traverse.h>
     32    789     ahrens #include <sys/dmu_tx.h>
     33    789     ahrens #include <sys/arc.h>
     34    789     ahrens #include <sys/zio.h>
     35    789     ahrens #include <sys/zap.h>
     36    789     ahrens #include <sys/unique.h>
     37    789     ahrens #include <sys/zfs_context.h>
     38   4007   mmusante #include <sys/zfs_ioctl.h>
     39   4543      marks #include <sys/spa.h>
     40   7046     ahrens #include <sys/zfs_znode.h>
     41  10242      chris #include <sys/zvol.h>
     42   1731    bonwick 
     43   6689     maybee static char *dsl_reaper = "the grim reaper";
     44   6689     maybee 
     45   2199     ahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
     46   2199     ahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
     47   5378   ck153898 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
     48    789     ahrens 
     49   3444   ek110237 #define	DS_REF_MAX	(1ULL << 62)
     50    789     ahrens 
     51    789     ahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
     52    789     ahrens 
     53   6689     maybee #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
     54   6689     maybee 
     55    789     ahrens 
     56   5378   ck153898 /*
     57   5378   ck153898  * Figure out how much of this delta should be propogated to the dsl_dir
     58   5378   ck153898  * layer.  If there's a refreservation, that space has already been
     59   5378   ck153898  * partially accounted for in our ancestors.
     60   5378   ck153898  */
     61   5378   ck153898 static int64_t
     62   5378   ck153898 parent_delta(dsl_dataset_t *ds, int64_t delta)
     63   5378   ck153898 {
     64   5378   ck153898 	uint64_t old_bytes, new_bytes;
     65   5378   ck153898 
     66   5378   ck153898 	if (ds->ds_reserved == 0)
     67   5378   ck153898 		return (delta);
     68   5378   ck153898 
     69   5378   ck153898 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
     70   5378   ck153898 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
     71   5378   ck153898 
     72   5378   ck153898 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
     73   5378   ck153898 	return (new_bytes - old_bytes);
     74   5378   ck153898 }
     75    789     ahrens 
     76    789     ahrens void
     77  10922       Jeff dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
     78    789     ahrens {
     79  10922       Jeff 	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
     80    789     ahrens 	int compressed = BP_GET_PSIZE(bp);
     81    789     ahrens 	int uncompressed = BP_GET_UCSIZE(bp);
     82   5378   ck153898 	int64_t delta;
     83    789     ahrens 
     84    789     ahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
     85    789     ahrens 
     86    789     ahrens 	ASSERT(dmu_tx_is_syncing(tx));
     87    789     ahrens 	/* It could have been compressed away to nothing */
     88    789     ahrens 	if (BP_IS_HOLE(bp))
     89    789     ahrens 		return;
     90    789     ahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
     91    789     ahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
     92    789     ahrens 	if (ds == NULL) {
     93    789     ahrens 		/*
     94    789     ahrens 		 * Account for the meta-objset space in its placeholder
     95    789     ahrens 		 * dsl_dir.
     96    789     ahrens 		 */
     97    789     ahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
     98   7390    Matthew 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
     99    789     ahrens 		    used, compressed, uncompressed, tx);
    100    789     ahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
    101    789     ahrens 		return;
    102    789     ahrens 	}
    103    789     ahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
    104   7595    Matthew 	mutex_enter(&ds->ds_dir->dd_lock);
    105    789     ahrens 	mutex_enter(&ds->ds_lock);
    106   5378   ck153898 	delta = parent_delta(ds, used);
    107    789     ahrens 	ds->ds_phys->ds_used_bytes += used;
    108    789     ahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
    109    789     ahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
    110    789     ahrens 	ds->ds_phys->ds_unique_bytes += used;
    111    789     ahrens 	mutex_exit(&ds->ds_lock);
    112   7390    Matthew 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
    113   7390    Matthew 	    compressed, uncompressed, tx);
    114   7390    Matthew 	dsl_dir_transfer_space(ds->ds_dir, used - delta,
    115   7390    Matthew 	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
    116   7595    Matthew 	mutex_exit(&ds->ds_dir->dd_lock);
    117    789     ahrens }
    118    789     ahrens 
    119   6992     maybee int
    120  10922       Jeff dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
    121  10922       Jeff     boolean_t async)
    122    789     ahrens {
    123  10922       Jeff 	if (BP_IS_HOLE(bp))
    124  10922       Jeff 		return (0);
    125  10922       Jeff 
    126  10922       Jeff 	ASSERT(dmu_tx_is_syncing(tx));
    127  10922       Jeff 	ASSERT(bp->blk_birth <= tx->tx_txg);
    128  10922       Jeff 
    129  10922       Jeff 	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
    130    789     ahrens 	int compressed = BP_GET_PSIZE(bp);
    131    789     ahrens 	int uncompressed = BP_GET_UCSIZE(bp);
    132    789     ahrens 
    133    789     ahrens 	ASSERT(used > 0);
    134    789     ahrens 	if (ds == NULL) {
    135    789     ahrens 		/*
    136    789     ahrens 		 * Account for the meta-objset space in its placeholder
    137    789     ahrens 		 * dataset.
    138    789     ahrens 		 */
    139  10922       Jeff 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
    140    789     ahrens 
    141   7390    Matthew 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
    142    789     ahrens 		    -used, -compressed, -uncompressed, tx);
    143    789     ahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
    144   6992     maybee 		return (used);
    145    789     ahrens 	}
    146    789     ahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
    147    789     ahrens 
    148   7390    Matthew 	ASSERT(!dsl_dataset_is_snapshot(ds));
    149    789     ahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
    150    789     ahrens 
    151    789     ahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
    152   5378   ck153898 		int64_t delta;
    153   3547     maybee 
    154    789     ahrens 		dprintf_bp(bp, "freeing: %s", "");
    155  10922       Jeff 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
    156    789     ahrens 
    157   7595    Matthew 		mutex_enter(&ds->ds_dir->dd_lock);
    158    789     ahrens 		mutex_enter(&ds->ds_lock);
    159   5378   ck153898 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
    160   5378   ck153898 		    !DS_UNIQUE_IS_ACCURATE(ds));
    161   5378   ck153898 		delta = parent_delta(ds, -used);
    162    789     ahrens 		ds->ds_phys->ds_unique_bytes -= used;
    163    789     ahrens 		mutex_exit(&ds->ds_lock);
    164   7390    Matthew 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
    165   5378   ck153898 		    delta, -compressed, -uncompressed, tx);
    166   7390    Matthew 		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
    167   7390    Matthew 		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
    168   7595    Matthew 		mutex_exit(&ds->ds_dir->dd_lock);
    169    789     ahrens 	} else {
    170    789     ahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
    171  10922       Jeff 		if (async) {
    172  10922       Jeff 			/*
    173  10922       Jeff 			 * We are here as part of zio's write done callback,
    174  10922       Jeff 			 * which means we're a zio interrupt thread.  We can't
    175  10922       Jeff 			 * call bplist_enqueue() now because it may block
    176  10922       Jeff 			 * waiting for I/O.  Instead, put bp on the deferred
    177  10922       Jeff 			 * queue and let dsl_pool_sync() finish the job.
    178  10922       Jeff 			 */
    179  10922       Jeff 			bplist_enqueue_deferred(&ds->ds_deadlist, bp);
    180  10922       Jeff 		} else {
    181  10922       Jeff 			VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
    182  10922       Jeff 		}
    183   5712     ahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
    184   5712     ahrens 		    ds->ds_phys->ds_prev_snap_obj);
    185   5712     ahrens 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
    186    789     ahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
    187   5712     ahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
    188   5712     ahrens 		    ds->ds_object && bp->blk_birth >
    189   5712     ahrens 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
    190   5712     ahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
    191   5712     ahrens 			mutex_enter(&ds->ds_prev->ds_lock);
    192   5712     ahrens 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
    193   5712     ahrens 			mutex_exit(&ds->ds_prev->ds_lock);
    194   7390    Matthew 		}
    195   7390    Matthew 		if (bp->blk_birth > ds->ds_origin_txg) {
    196   7390    Matthew 			dsl_dir_transfer_space(ds->ds_dir, used,
    197   7390    Matthew 			    DD_USED_HEAD, DD_USED_SNAP, tx);
    198    789     ahrens 		}
    199    789     ahrens 	}
    200    789     ahrens 	mutex_enter(&ds->ds_lock);
    201    789     ahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
    202    789     ahrens 	ds->ds_phys->ds_used_bytes -= used;
    203    789     ahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
    204    789     ahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
    205    789     ahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
    206    789     ahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
    207    789     ahrens 	mutex_exit(&ds->ds_lock);
    208   6992     maybee 
    209   6992     maybee 	return (used);
    210    789     ahrens }
    211    789     ahrens 
    212   1544   eschrock uint64_t
    213   1544   eschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
    214    789     ahrens {
    215   2885     ahrens 	uint64_t trysnap = 0;
    216   2885     ahrens 
    217    789     ahrens 	if (ds == NULL)
    218   1544   eschrock 		return (0);
    219    789     ahrens 	/*
    220    789     ahrens 	 * The snapshot creation could fail, but that would cause an
    221    789     ahrens 	 * incorrect FALSE return, which would only result in an
    222    789     ahrens 	 * overestimation of the amount of space that an operation would
    223    789     ahrens 	 * consume, which is OK.
    224    789     ahrens 	 *
    225    789     ahrens 	 * There's also a small window where we could miss a pending
    226    789     ahrens 	 * snapshot, because we could set the sync task in the quiescing
    227    789     ahrens 	 * phase.  So this should only be used as a guess.
    228    789     ahrens 	 */
    229   2885     ahrens 	if (ds->ds_trysnap_txg >
    230   2885     ahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
    231   2885     ahrens 		trysnap = ds->ds_trysnap_txg;
    232   2885     ahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
    233   1544   eschrock }
    234   1544   eschrock 
    235   9653    Sanjeev boolean_t
    236   1544   eschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
    237   1544   eschrock {
    238   1544   eschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
    239    789     ahrens }
    240    789     ahrens 
    241    789     ahrens /* ARGSUSED */
    242    789     ahrens static void
    243    789     ahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
    244    789     ahrens {
    245    789     ahrens 	dsl_dataset_t *ds = dsv;
    246    789     ahrens 
    247   6689     maybee 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
    248    789     ahrens 
    249   4787     ahrens 	unique_remove(ds->ds_fsid_guid);
    250    789     ahrens 
    251  10298    Matthew 	if (ds->ds_objset != NULL)
    252  10298    Matthew 		dmu_objset_evict(ds->ds_objset);
    253    789     ahrens 
    254    789     ahrens 	if (ds->ds_prev) {
    255   6689     maybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
    256    789     ahrens 		ds->ds_prev = NULL;
    257    789     ahrens 	}
    258    789     ahrens 
    259    789     ahrens 	bplist_close(&ds->ds_deadlist);
    260   6689     maybee 	if (ds->ds_dir)
    261   6689     maybee 		dsl_dir_close(ds->ds_dir, ds);
    262    789     ahrens 
    263   4787     ahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
    264    789     ahrens 
    265   2856   nd150628 	mutex_destroy(&ds->ds_lock);
    266  10204    Matthew 	mutex_destroy(&ds->ds_recvlock);
    267   4787     ahrens 	mutex_destroy(&ds->ds_opening_lock);
    268   6689     maybee 	rw_destroy(&ds->ds_rwlock);
    269   6689     maybee 	cv_destroy(&ds->ds_exclusive_cv);
    270  10922       Jeff 	bplist_fini(&ds->ds_deadlist);
    271   2856   nd150628 
    272    789     ahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
    273    789     ahrens }
    274    789     ahrens 
    275   1544   eschrock static int
    276    789     ahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
    277    789     ahrens {
    278    789     ahrens 	dsl_dataset_phys_t *headphys;
    279    789     ahrens 	int err;
    280    789     ahrens 	dmu_buf_t *headdbuf;
    281    789     ahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
    282    789     ahrens 	objset_t *mos = dp->dp_meta_objset;
    283    789     ahrens 
    284    789     ahrens 	if (ds->ds_snapname[0])
    285   1544   eschrock 		return (0);
    286    789     ahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
    287   1544   eschrock 		return (0);
    288    789     ahrens 
    289   1544   eschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
    290   1544   eschrock 	    FTAG, &headdbuf);
    291   1544   eschrock 	if (err)
    292   1544   eschrock 		return (err);
    293    789     ahrens 	headphys = headdbuf->db_data;
    294    789     ahrens 	err = zap_value_search(dp->dp_meta_objset,
    295   4577     ahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
    296   1544   eschrock 	dmu_buf_rele(headdbuf, FTAG);
    297   1544   eschrock 	return (err);
    298    789     ahrens }
    299    789     ahrens 
    300   6492       timh static int
    301   6689     maybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
    302   6492       timh {
    303   6689     maybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
    304   6689     maybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
    305   6492       timh 	matchtype_t mt;
    306   6492       timh 	int err;
    307   6492       timh 
    308   6689     maybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
    309   6492       timh 		mt = MT_FIRST;
    310   6492       timh 	else
    311   6492       timh 		mt = MT_EXACT;
    312   6492       timh 
    313   6689     maybee 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
    314   6492       timh 	    value, mt, NULL, 0, NULL);
    315   6492       timh 	if (err == ENOTSUP && mt == MT_FIRST)
    316   6689     maybee 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
    317   6492       timh 	return (err);
    318   6492       timh }
    319   6492       timh 
    320   6492       timh static int
    321   6689     maybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
    322   6492       timh {
    323   6689     maybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
    324   6689     maybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
    325   6492       timh 	matchtype_t mt;
    326   6492       timh 	int err;
    327  10373      chris 
    328  10373      chris 	dsl_dir_snap_cmtime_update(ds->ds_dir);
    329   6492       timh 
    330   6689     maybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
    331   6492       timh 		mt = MT_FIRST;
    332   6492       timh 	else
    333   6492       timh 		mt = MT_EXACT;
    334   6492       timh 
    335   6689     maybee 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
    336   6492       timh 	if (err == ENOTSUP && mt == MT_FIRST)
    337   6689     maybee 		err = zap_remove(mos, snapobj, name, tx);
    338   6492       timh 	return (err);
    339   6492       timh }
    340   6492       timh 
    341   6689     maybee static int
    342   6689     maybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
    343   6689     maybee     dsl_dataset_t **dsp)
    344    789     ahrens {
    345    789     ahrens 	objset_t *mos = dp->dp_meta_objset;
    346    789     ahrens 	dmu_buf_t *dbuf;
    347    789     ahrens 	dsl_dataset_t *ds;
    348   1544   eschrock 	int err;
    349    789     ahrens 
    350    789     ahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
    351    789     ahrens 	    dsl_pool_sync_context(dp));
    352    789     ahrens 
    353   1544   eschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
    354   1544   eschrock 	if (err)
    355   1544   eschrock 		return (err);
    356    789     ahrens 	ds = dmu_buf_get_user(dbuf);
    357    789     ahrens 	if (ds == NULL) {
    358    789     ahrens 		dsl_dataset_t *winner;
    359    789     ahrens 
    360    789     ahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
    361    789     ahrens 		ds->ds_dbuf = dbuf;
    362    789     ahrens 		ds->ds_object = dsobj;
    363    789     ahrens 		ds->ds_phys = dbuf->db_data;
    364    789     ahrens 
    365   2856   nd150628 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
    366  10204    Matthew 		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
    367   4787     ahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
    368   6689     maybee 		rw_init(&ds->ds_rwlock, 0, 0, 0);
    369   6689     maybee 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
    370  10922       Jeff 		bplist_init(&ds->ds_deadlist);
    371   2856   nd150628 
    372   1544   eschrock 		err = bplist_open(&ds->ds_deadlist,
    373    789     ahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
    374   1544   eschrock 		if (err == 0) {
    375   1544   eschrock 			err = dsl_dir_open_obj(dp,
    376   1544   eschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
    377   1544   eschrock 		}
    378   1544   eschrock 		if (err) {
    379   1544   eschrock 			/*
    380   1544   eschrock 			 * we don't really need to close the blist if we
    381   1544   eschrock 			 * just opened it.
    382   1544   eschrock 			 */
    383   2856   nd150628 			mutex_destroy(&ds->ds_lock);
    384  10204    Matthew 			mutex_destroy(&ds->ds_recvlock);
    385   4787     ahrens 			mutex_destroy(&ds->ds_opening_lock);
    386   6689     maybee 			rw_destroy(&ds->ds_rwlock);
    387   6689     maybee 			cv_destroy(&ds->ds_exclusive_cv);
    388  10922       Jeff 			bplist_fini(&ds->ds_deadlist);
    389   1544   eschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
    390   1544   eschrock 			dmu_buf_rele(dbuf, tag);
    391   1544   eschrock 			return (err);
    392   1544   eschrock 		}
    393    789     ahrens 
    394   7390    Matthew 		if (!dsl_dataset_is_snapshot(ds)) {
    395    789     ahrens 			ds->ds_snapname[0] = '\0';
    396    789     ahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
    397   6689     maybee 				err = dsl_dataset_get_ref(dp,
    398   6689     maybee 				    ds->ds_phys->ds_prev_snap_obj,
    399   6689     maybee 				    ds, &ds->ds_prev);
    400    789     ahrens 			}
    401   7390    Matthew 
    402   7390    Matthew 			if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) {
    403   7390    Matthew 				dsl_dataset_t *origin;
    404   7390    Matthew 
    405   7390    Matthew 				err = dsl_dataset_hold_obj(dp,
    406   7390    Matthew 				    ds->ds_dir->dd_phys->dd_origin_obj,
    407   7390    Matthew 				    FTAG, &origin);
    408   7390    Matthew 				if (err == 0) {
    409   7390    Matthew 					ds->ds_origin_txg =
    410   7390    Matthew 					    origin->ds_phys->ds_creation_txg;
    411   7390    Matthew 					dsl_dataset_rele(origin, FTAG);
    412   7390    Matthew 				}
    413   7390    Matthew 			}
    414  10242      chris 		} else {
    415  10242      chris 			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
    416  10242      chris 				err = dsl_dataset_get_snapname(ds);
    417  10242      chris 			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
    418  10242      chris 				err = zap_count(
    419  10242      chris 				    ds->ds_dir->dd_pool->dp_meta_objset,
    420  10242      chris 				    ds->ds_phys->ds_userrefs_obj,
    421  10242      chris 				    &ds->ds_userrefs);
    422  10242      chris 			}
    423    789     ahrens 		}
    424    789     ahrens 
    425   7390    Matthew 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
    426   5569   ck153898 			/*
    427   5569   ck153898 			 * In sync context, we're called with either no lock
    428   5569   ck153898 			 * or with the write lock.  If we're not syncing,
    429   5569   ck153898 			 * we're always called with the read lock held.
    430   5569   ck153898 			 */
    431   5475   ck153898 			boolean_t need_lock =
    432   5569   ck153898 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
    433   5569   ck153898 			    dsl_pool_sync_context(dp);
    434   5475   ck153898 
    435   5475   ck153898 			if (need_lock)
    436   5475   ck153898 				rw_enter(&dp->dp_config_rwlock, RW_READER);
    437   5475   ck153898 
    438   7265     ahrens 			err = dsl_prop_get_ds(ds,
    439   5475   ck153898 			    "refreservation", sizeof (uint64_t), 1,
    440   5475   ck153898 			    &ds->ds_reserved, NULL);
    441   5475   ck153898 			if (err == 0) {
    442   7265     ahrens 				err = dsl_prop_get_ds(ds,
    443   5475   ck153898 				    "refquota", sizeof (uint64_t), 1,
    444   5475   ck153898 				    &ds->ds_quota, NULL);
    445   5475   ck153898 			}
    446   5475   ck153898 
    447   5475   ck153898 			if (need_lock)
    448   5475   ck153898 				rw_exit(&dp->dp_config_rwlock);
    449   5475   ck153898 		} else {
    450   5475   ck153898 			ds->ds_reserved = ds->ds_quota = 0;
    451   5475   ck153898 		}
    452   5475   ck153898 
    453   1544   eschrock 		if (err == 0) {
    454   1544   eschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
    455   1544   eschrock 			    dsl_dataset_evict);
    456   1544   eschrock 		}
    457   1544   eschrock 		if (err || winner) {
    458    789     ahrens 			bplist_close(&ds->ds_deadlist);
    459   6689     maybee 			if (ds->ds_prev)
    460   6689     maybee 				dsl_dataset_drop_ref(ds->ds_prev, ds);
    461    789     ahrens 			dsl_dir_close(ds->ds_dir, ds);
    462   2856   nd150628 			mutex_destroy(&ds->ds_lock);
    463  10204    Matthew 			mutex_destroy(&ds->ds_recvlock);
    464   4787     ahrens 			mutex_destroy(&ds->ds_opening_lock);
    465   6689     maybee 			rw_destroy(&ds->ds_rwlock);
    466   6689     maybee 			cv_destroy(&ds->ds_exclusive_cv);
    467  10922       Jeff 			bplist_fini(&ds->ds_deadlist);
    468    789     ahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
    469   1544   eschrock 			if (err) {
    470   1544   eschrock 				dmu_buf_rele(dbuf, tag);
    471   1544   eschrock 				return (err);
    472   1544   eschrock 			}
    473    789     ahrens 			ds = winner;
    474    789     ahrens 		} else {
    475   4787     ahrens 			ds->ds_fsid_guid =
    476    789     ahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
    477   5378   ck153898 		}
    478    789     ahrens 	}
    479    789     ahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
    480    789     ahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
    481   7046     ahrens 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
    482   7061     ahrens 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
    483   7077     ahrens 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
    484    789     ahrens 	mutex_enter(&ds->ds_lock);
    485   6689     maybee 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
    486    789     ahrens 		mutex_exit(&ds->ds_lock);
    487   6689     maybee 		dmu_buf_rele(ds->ds_dbuf, tag);
    488   6689     maybee 		return (ENOENT);
    489    789     ahrens 	}
    490    789     ahrens 	mutex_exit(&ds->ds_lock);
    491   1544   eschrock 	*dsp = ds;
    492   1544   eschrock 	return (0);
    493    789     ahrens }
    494    789     ahrens 
    495   6689     maybee static int
    496   6689     maybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
    497   6689     maybee {
    498   6689     maybee 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
    499   6689     maybee 
    500   6689     maybee 	/*
    501   6689     maybee 	 * In syncing context we don't want the rwlock lock: there
    502   6689     maybee 	 * may be an existing writer waiting for sync phase to
    503   6689     maybee 	 * finish.  We don't need to worry about such writers, since
    504   6689     maybee 	 * sync phase is single-threaded, so the writer can't be
    505   6689     maybee 	 * doing anything while we are active.
    506   6689     maybee 	 */
    507   6689     maybee 	if (dsl_pool_sync_context(dp)) {
    508   6689     maybee 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
    509   6689     maybee 		return (0);
    510   6689     maybee 	}
    511   6689     maybee 
    512   6689     maybee 	/*
    513   6689     maybee 	 * Normal users will hold the ds_rwlock as a READER until they
    514   6689     maybee 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
    515   6689     maybee 	 * drop their READER lock after they set the ds_owner field.
    516   6689     maybee 	 *
    517   6689     maybee 	 * If the dataset is being destroyed, the destroy thread will
    518   6689     maybee 	 * obtain a WRITER lock for exclusive access after it's done its
    519   6689     maybee 	 * open-context work and then change the ds_owner to
    520   6689     maybee 	 * dsl_reaper once destruction is assured.  So threads
    521   6689     maybee 	 * may block here temporarily, until the "destructability" of
    522   6689     maybee 	 * the dataset is determined.
    523   6689     maybee 	 */
    524   6689     maybee 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
    525   6689     maybee 	mutex_enter(&ds->ds_lock);
    526   6689     maybee 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
    527   6689     maybee 		rw_exit(&dp->dp_config_rwlock);
    528   6689     maybee 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
    529   6689     maybee 		if (DSL_DATASET_IS_DESTROYED(ds)) {
    530   6689     maybee 			mutex_exit(&ds->ds_lock);
    531   6689     maybee 			dsl_dataset_drop_ref(ds, tag);
    532   6689     maybee 			rw_enter(&dp->dp_config_rwlock, RW_READER);
    533   6689     maybee 			return (ENOENT);
    534   6689     maybee 		}
    535   6689     maybee 		rw_enter(&dp->dp_config_rwlock, RW_READER);
    536   6689     maybee 	}
    537   6689     maybee 	mutex_exit(&ds->ds_lock);
    538   6689     maybee 	return (0);
    539   6689     maybee }
    540   6689     maybee 
    541    789     ahrens int
    542   6689     maybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
    543   6689     maybee     dsl_dataset_t **dsp)
    544   6689     maybee {
    545   6689     maybee 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
    546   6689     maybee 
    547   6689     maybee 	if (err)
    548   6689     maybee 		return (err);
    549   6689     maybee 	return (dsl_dataset_hold_ref(*dsp, tag));
    550   6689     maybee }
    551   6689     maybee 
    552   6689     maybee int
    553  10298    Matthew dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
    554  10298    Matthew     void *tag, dsl_dataset_t **dsp)
    555   6689     maybee {
    556  10298    Matthew 	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
    557   6689     maybee 	if (err)
    558   6689     maybee 		return (err);
    559  10298    Matthew 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
    560  10298    Matthew 		dsl_dataset_rele(*dsp, tag);
    561   8779       Mark 		*dsp = NULL;
    562   6689     maybee 		return (EBUSY);
    563   6689     maybee 	}
    564   6689     maybee 	return (0);
    565   6689     maybee }
    566   6689     maybee 
    567   6689     maybee int
    568   6689     maybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
    569    789     ahrens {
    570    789     ahrens 	dsl_dir_t *dd;
    571    789     ahrens 	dsl_pool_t *dp;
    572   6689     maybee 	const char *snapname;
    573    789     ahrens 	uint64_t obj;
    574    789     ahrens 	int err = 0;
    575    789     ahrens 
    576   6689     maybee 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
    577   1544   eschrock 	if (err)
    578   1544   eschrock 		return (err);
    579    789     ahrens 
    580    789     ahrens 	dp = dd->dd_pool;
    581    789     ahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
    582    789     ahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
    583   6689     maybee 	if (obj)
    584   6689     maybee 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
    585   6689     maybee 	else
    586    789     ahrens 		err = ENOENT;
    587   6689     maybee 	if (err)
    588    789     ahrens 		goto out;
    589    789     ahrens 
    590   6689     maybee 	err = dsl_dataset_hold_ref(*dsp, tag);
    591    789     ahrens 
    592   6689     maybee 	/* we may be looking for a snapshot */
    593   6689     maybee 	if (err == 0 && snapname != NULL) {
    594   6689     maybee 		dsl_dataset_t *ds = NULL;
    595    789     ahrens 
    596   6689     maybee 		if (*snapname++ != '@') {
    597   6689     maybee 			dsl_dataset_rele(*dsp, tag);
    598    789     ahrens 			err = ENOENT;
    599    789     ahrens 			goto out;
    600    789     ahrens 		}
    601    789     ahrens 
    602   6689     maybee 		dprintf("looking for snapshot '%s'\n", snapname);
    603   6689     maybee 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
    604   6689     maybee 		if (err == 0)
    605   6689     maybee 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
    606   6689     maybee 		dsl_dataset_rele(*dsp, tag);
    607   6689     maybee 
    608   6689     maybee 		ASSERT3U((err == 0), ==, (ds != NULL));
    609   6689     maybee 
    610   6689     maybee 		if (ds) {
    611   6689     maybee 			mutex_enter(&ds->ds_lock);
    612   6689     maybee 			if (ds->ds_snapname[0] == 0)
    613   6689     maybee 				(void) strlcpy(ds->ds_snapname, snapname,
    614   6689     maybee 				    sizeof (ds->ds_snapname));
    615   6689     maybee 			mutex_exit(&ds->ds_lock);
    616   6689     maybee 			err = dsl_dataset_hold_ref(ds, tag);
    617   6689     maybee 			*dsp = err ? NULL : ds;
    618    789     ahrens 		}
    619    789     ahrens 	}
    620    789     ahrens out:
    621    789     ahrens 	rw_exit(&dp->dp_config_rwlock);
    622    789     ahrens 	dsl_dir_close(dd, FTAG);
    623    789     ahrens 	return (err);
    624    789     ahrens }
    625    789     ahrens 
    626    789     ahrens int
    627  10298    Matthew dsl_dataset_own(const char *name, boolean_t inconsistentok,
    628  10298    Matthew     void *tag, dsl_dataset_t **dsp)
    629    789     ahrens {
    630  10298    Matthew 	int err = dsl_dataset_hold(name, tag, dsp);
    631   6689     maybee 	if (err)
    632   6689     maybee 		return (err);
    633  10298    Matthew 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
    634  10298    Matthew 		dsl_dataset_rele(*dsp, tag);
    635   6689     maybee 		return (EBUSY);
    636   6689     maybee 	}
    637   6689     maybee 	return (0);
    638    789     ahrens }
    639    789     ahrens 
    640    789     ahrens void
    641    789     ahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
    642    789     ahrens {
    643    789     ahrens 	if (ds == NULL) {
    644    789     ahrens 		(void) strcpy(name, "mos");
    645    789     ahrens 	} else {
    646    789     ahrens 		dsl_dir_name(ds->ds_dir, name);
    647   1544   eschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
    648    789     ahrens 		if (ds->ds_snapname[0]) {
    649    789     ahrens 			(void) strcat(name, "@");
    650   6689     maybee 			/*
    651   6689     maybee 			 * We use a "recursive" mutex so that we
    652   6689     maybee 			 * can call dprintf_ds() with ds_lock held.
    653   6689     maybee 			 */
    654    789     ahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
    655    789     ahrens 				mutex_enter(&ds->ds_lock);
    656    789     ahrens 				(void) strcat(name, ds->ds_snapname);
    657    789     ahrens 				mutex_exit(&ds->ds_lock);
    658    789     ahrens 			} else {
    659    789     ahrens 				(void) strcat(name, ds->ds_snapname);
    660    789     ahrens 			}
    661    789     ahrens 		}
    662    789     ahrens 	}
    663   3978   mmusante }
    664   3978   mmusante 
    665   3978   mmusante static int
    666   3978   mmusante dsl_dataset_namelen(dsl_dataset_t *ds)
    667   3978   mmusante {
    668   3978   mmusante 	int result;
    669   3978   mmusante 
    670   3978   mmusante 	if (ds == NULL) {
    671   3978   mmusante 		result = 3;	/* "mos" */
    672   3978   mmusante 	} else {
    673   3978   mmusante 		result = dsl_dir_namelen(ds->ds_dir);
    674   3978   mmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
    675   3978   mmusante 		if (ds->ds_snapname[0]) {
    676   3978   mmusante 			++result;	/* adding one for the @-sign */
    677   3978   mmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
    678   3978   mmusante 				mutex_enter(&ds->ds_lock);
    679   3978   mmusante 				result += strlen(ds->ds_snapname);
    680   3978   mmusante 				mutex_exit(&ds->ds_lock);
    681   3978   mmusante 			} else {
    682   3978   mmusante 				result += strlen(ds->ds_snapname);
    683   3978   mmusante 			}
    684   3978   mmusante 		}
    685   3978   mmusante 	}
    686   3978   mmusante 
    687   3978   mmusante 	return (result);
    688    789     ahrens }
    689    789     ahrens 
    690   7046     ahrens void
    691   6689     maybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
    692    789     ahrens {
    693   1544   eschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
    694    789     ahrens }
    695    789     ahrens 
    696    789     ahrens void
    697   6689     maybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
    698   5367     ahrens {
    699   6689     maybee 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
    700   6689     maybee 		rw_exit(&ds->ds_rwlock);
    701   6689     maybee 	}
    702   6689     maybee 	dsl_dataset_drop_ref(ds, tag);
    703   6689     maybee }
    704   6689     maybee 
    705   6689     maybee void
    706  10298    Matthew dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
    707   6689     maybee {
    708  10298    Matthew 	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
    709   6689     maybee 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
    710   6689     maybee 
    711   5367     ahrens 	mutex_enter(&ds->ds_lock);
    712   6689     maybee 	ds->ds_owner = NULL;
    713   6689     maybee 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
    714   6689     maybee 		rw_exit(&ds->ds_rwlock);
    715   6689     maybee 		cv_broadcast(&ds->ds_exclusive_cv);
    716   6689     maybee 	}
    717   5367     ahrens 	mutex_exit(&ds->ds_lock);
    718   6689     maybee 	if (ds->ds_dbuf)
    719  10298    Matthew 		dsl_dataset_drop_ref(ds, tag);
    720   6689     maybee 	else
    721   6689     maybee 		dsl_dataset_evict(ds->ds_dbuf, ds);
    722   5367     ahrens }
    723   5367     ahrens 
    724   5367     ahrens boolean_t
    725  10298    Matthew dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
    726   5367     ahrens {
    727   6689     maybee 	boolean_t gotit = FALSE;
    728   6689     maybee 
    729   5367     ahrens 	mutex_enter(&ds->ds_lock);
    730   6689     maybee 	if (ds->ds_owner == NULL &&
    731   6689     maybee 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
    732  10298    Matthew 		ds->ds_owner = tag;
    733   6689     maybee 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
    734   6689     maybee 			rw_exit(&ds->ds_rwlock);
    735   6689     maybee 		gotit = TRUE;
    736   5367     ahrens 	}
    737   5367     ahrens 	mutex_exit(&ds->ds_lock);
    738   6689     maybee 	return (gotit);
    739   6689     maybee }
    740   6689     maybee 
    741   6689     maybee void
    742   6689     maybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
    743   6689     maybee {
    744   6689     maybee 	ASSERT3P(owner, ==, ds->ds_owner);
    745   6689     maybee 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
    746   6689     maybee 		rw_enter(&ds->ds_rwlock, RW_WRITER);
    747   5367     ahrens }
    748   5367     ahrens 
    749   2199     ahrens uint64_t
    750   7046     ahrens dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
    751   6492       timh     uint64_t flags, dmu_tx_t *tx)
    752    789     ahrens {
    753   5367     ahrens 	dsl_pool_t *dp = dd->dd_pool;
    754    789     ahrens 	dmu_buf_t *dbuf;
    755    789     ahrens 	dsl_dataset_phys_t *dsphys;
    756   5367     ahrens 	uint64_t dsobj;
    757    789     ahrens 	objset_t *mos = dp->dp_meta_objset;
    758   7046     ahrens 
    759   7046     ahrens 	if (origin == NULL)
    760   7046     ahrens 		origin = dp->dp_origin_snap;
    761    789     ahrens 
    762   5367     ahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
    763   5367     ahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
    764    789     ahrens 	ASSERT(dmu_tx_is_syncing(tx));
    765   5367     ahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
    766    789     ahrens 
    767    928     tabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
    768    928     tabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
    769   1544   eschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
    770    789     ahrens 	dmu_buf_will_dirty(dbuf, tx);
    771    789     ahrens 	dsphys = dbuf->db_data;
    772   6689     maybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
    773    789     ahrens 	dsphys->ds_dir_obj = dd->dd_object;
    774   6492       timh 	dsphys->ds_flags = flags;
    775    789     ahrens 	dsphys->ds_fsid_guid = unique_create();
    776    789     ahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
    777    789     ahrens 	    sizeof (dsphys->ds_guid));
    778    789     ahrens 	dsphys->ds_snapnames_zapobj =
    779   6492       timh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
    780   6492       timh 	    DMU_OT_NONE, 0, tx);
    781    789     ahrens 	dsphys->ds_creation_time = gethrestime_sec();
    782   7046     ahrens 	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
    783    789     ahrens 	dsphys->ds_deadlist_obj =
    784    789     ahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
    785   5378   ck153898 
    786   5367     ahrens 	if (origin) {
    787   5367     ahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
    788    789     ahrens 		dsphys->ds_prev_snap_txg =
    789   5367     ahrens 		    origin->ds_phys->ds_creation_txg;
    790    789     ahrens 		dsphys->ds_used_bytes =
    791   5367     ahrens 		    origin->ds_phys->ds_used_bytes;
    792    789     ahrens 		dsphys->ds_compressed_bytes =
    793   5367     ahrens 		    origin->ds_phys->ds_compressed_bytes;
    794    789     ahrens 		dsphys->ds_uncompressed_bytes =
    795   5367     ahrens 		    origin->ds_phys->ds_uncompressed_bytes;
    796   5367     ahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
    797   6502       timh 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
    798    789     ahrens 
    799   5367     ahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
    800   5367     ahrens 		origin->ds_phys->ds_num_children++;
    801   7046     ahrens 
    802   7046     ahrens 		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
    803   7046     ahrens 			if (origin->ds_phys->ds_next_clones_obj == 0) {
    804   7046     ahrens 				origin->ds_phys->ds_next_clones_obj =
    805   7046     ahrens 				    zap_create(mos,
    806   7046     ahrens 				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
    807   7046     ahrens 			}
    808   7046     ahrens 			VERIFY(0 == zap_add_int(mos,
    809   7046     ahrens 			    origin->ds_phys->ds_next_clones_obj,
    810   7046     ahrens 			    dsobj, tx));
    811   7046     ahrens 		}
    812    789     ahrens 
    813    789     ahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
    814   5367     ahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
    815    789     ahrens 	}
    816   6492       timh 
    817   6492       timh 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
    818   6492       timh 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
    819   6492       timh 
    820   1544   eschrock 	dmu_buf_rele(dbuf, FTAG);
    821    789     ahrens 
    822    789     ahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
    823    789     ahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
    824   5367     ahrens 
    825   5367     ahrens 	return (dsobj);
    826   5367     ahrens }
    827   5367     ahrens 
    828   5367     ahrens uint64_t
    829   6492       timh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
    830   6492       timh     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
    831   5367     ahrens {
    832   5367     ahrens 	dsl_pool_t *dp = pdd->dd_pool;
    833   5367     ahrens 	uint64_t dsobj, ddobj;
    834   5367     ahrens 	dsl_dir_t *dd;
    835   5367     ahrens 
    836   5367     ahrens 	ASSERT(lastname[0] != '@');
    837   5367     ahrens 
    838   7046     ahrens 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
    839   5367     ahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
    840   5367     ahrens 
    841   7046     ahrens 	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
    842   5367     ahrens 
    843   5367     ahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
    844   5367     ahrens 
    845    789     ahrens 	dsl_dir_close(dd, FTAG);
    846    789     ahrens 
    847   2199     ahrens 	return (dsobj);
    848   2199     ahrens }
    849   2199     ahrens 
    850   2199     ahrens struct destroyarg {
    851   2199     ahrens 	dsl_sync_task_group_t *dstg;
    852   2199     ahrens 	char *snapname;
    853   2199     ahrens 	char *failed;
    854  10242      chris 	boolean_t defer;
    855   2199     ahrens };
    856   2199     ahrens 
    857   2199     ahrens static int
    858   2199     ahrens dsl_snapshot_destroy_one(char *name, void *arg)
    859   2199     ahrens {
    860   2199     ahrens 	struct destroyarg *da = arg;
    861   2199     ahrens 	dsl_dataset_t *ds;
    862   2199     ahrens 	int err;
    863  10242      chris 	char *dsname;
    864   2199     ahrens 
    865  10272    Matthew 	dsname = kmem_asprintf("%s@%s", name, da->snapname);
    866  10298    Matthew 	err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
    867  10272    Matthew 	strfree(dsname);
    868   6689     maybee 	if (err == 0) {
    869  10242      chris 		struct dsl_ds_destroyarg *dsda;
    870  10242      chris 
    871   6689     maybee 		dsl_dataset_make_exclusive(ds, da->dstg);
    872  10298    Matthew 		if (ds->ds_objset != NULL) {
    873  10298    Matthew 			dmu_objset_evict(ds->ds_objset);
    874  10298    Matthew 			ds->ds_objset = NULL;
    875   7237   ek110237 		}
    876  10242      chris 		dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
    877  10242      chris 		dsda->ds = ds;
    878  10242      chris 		dsda->defer = da->defer;
    879   6689     maybee 		dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
    880  10242      chris 		    dsl_dataset_destroy_sync, dsda, da->dstg, 0);
    881   6689     maybee 	} else if (err == ENOENT) {
    882   6689     maybee 		err = 0;
    883   6689     maybee 	} else {
    884   2199     ahrens 		(void) strcpy(da->failed, name);
    885   2199     ahrens 	}
    886   6689     maybee 	return (err);
    887   2199     ahrens }
    888   2199     ahrens 
    889   2199     ahrens /*
    890   2199     ahrens  * Destroy 'snapname' in all descendants of 'fsname'.
    891   2199     ahrens  */
    892   2199     ahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
    893   2199     ahrens int
    894  10242      chris dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
    895   2199     ahrens {
    896   2199     ahrens 	int err;
    897   2199     ahrens 	struct destroyarg da;
    898   2199     ahrens 	dsl_sync_task_t *dst;
    899   2199     ahrens 	spa_t *spa;
    900   2199     ahrens 
    901   4603     ahrens 	err = spa_open(fsname, &spa, FTAG);
    902   2199     ahrens 	if (err)
    903   2199     ahrens 		return (err);
    904   2199     ahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
    905   2199     ahrens 	da.snapname = snapname;
    906   2199     ahrens 	da.failed = fsname;
    907  10242      chris 	da.defer = defer;
    908   2199     ahrens 
    909   2199     ahrens 	err = dmu_objset_find(fsname,
    910   2417     ahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
    911   2199     ahrens 
    912   2199     ahrens 	if (err == 0)
    913   2199     ahrens 		err = dsl_sync_task_group_wait(da.dstg);
    914   2199     ahrens 
    915   2199     ahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
    916   2199     ahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
    917  10242      chris 		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
    918  10242      chris 		dsl_dataset_t *ds = dsda->ds;
    919  10242      chris 
    920   6689     maybee 		/*
    921   6689     maybee 		 * Return the file system name that triggered the error
    922   6689     maybee 		 */
    923   2199     ahrens 		if (dst->dst_err) {
    924   2199     ahrens 			dsl_dataset_name(ds, fsname);
    925   4603     ahrens 			*strchr(fsname, '@') = '\0';
    926   2199     ahrens 		}
    927  10242      chris 		ASSERT3P(dsda->rm_origin, ==, NULL);
    928   6689     maybee 		dsl_dataset_disown(ds, da.dstg);
    929  10242      chris 		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
    930   2199     ahrens 	}
    931   2199     ahrens 
    932   2199     ahrens 	dsl_sync_task_group_destroy(da.dstg);
    933   2199     ahrens 	spa_close(spa, FTAG);
    934   2199     ahrens 	return (err);
    935    789     ahrens }
    936    789     ahrens 
    937  10242      chris static boolean_t
    938  10242      chris dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
    939  10242      chris {
    940  10242      chris 	boolean_t might_destroy = B_FALSE;
    941  10242      chris 
    942  10242      chris 	mutex_enter(&ds->ds_lock);
    943  10242      chris 	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
    944  10242      chris 	    DS_IS_DEFER_DESTROY(ds))
    945  10242      chris 		might_destroy = B_TRUE;
    946  10242      chris 	mutex_exit(&ds->ds_lock);
    947  10242      chris 
    948  10242      chris 	return (might_destroy);
    949  10242      chris }
    950  10242      chris 
    951  10242      chris /*
    952  10242      chris  * If we're removing a clone, and these three conditions are true:
    953  10242      chris  *	1) the clone's origin has no other children
    954  10242      chris  *	2) the clone's origin has no user references
    955  10242      chris  *	3) the clone's origin has been marked for deferred destruction
    956  10242      chris  * Then, prepare to remove the origin as part of this sync task group.
    957  10242      chris  */
    958  10242      chris static int
    959  10242      chris dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
    960  10242      chris {
    961  10242      chris 	dsl_dataset_t *ds = dsda->ds;
    962  10242      chris 	dsl_dataset_t *origin = ds->ds_prev;
    963  10242      chris 
    964  10242      chris 	if (dsl_dataset_might_destroy_origin(origin)) {
    965  10242      chris 		char *name;
    966  10242      chris 		int namelen;
    967  10242      chris 		int error;
    968  10242      chris 
    969  10242      chris 		namelen = dsl_dataset_namelen(origin) + 1;
    970  10242      chris 		name = kmem_alloc(namelen, KM_SLEEP);
    971  10242      chris 		dsl_dataset_name(origin, name);
    972  10242      chris #ifdef _KERNEL
    973  10242      chris 		error = zfs_unmount_snap(name, NULL);
    974  10242      chris 		if (error) {
    975  10242      chris 			kmem_free(name, namelen);
    976  10242      chris 			return (error);
    977  10242      chris 		}
    978  10242      chris #endif
    979  10298    Matthew 		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
    980  10242      chris 		kmem_free(name, namelen);
    981  10242      chris 		if (error)
    982  10242      chris 			return (error);
    983  10242      chris 		dsda->rm_origin = origin;
    984  10242      chris 		dsl_dataset_make_exclusive(origin, tag);
    985  10342      chris 
    986  10342      chris 		if (origin->ds_objset != NULL) {
    987  10342      chris 			dmu_objset_evict(origin->ds_objset);
    988  10342      chris 			origin->ds_objset = NULL;
    989  10342      chris 		}
    990  10242      chris 	}
    991  10242      chris 
    992  10242      chris 	return (0);
    993  10242      chris }
    994  10242      chris 
    995   5367     ahrens /*
    996   6689     maybee  * ds must be opened as OWNER.  On return (whether successful or not),
    997   6689     maybee  * ds will be closed and caller can no longer dereference it.
    998   5367     ahrens  */
    999    789     ahrens int
   1000  10242      chris dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
   1001    789     ahrens {
   1002    789     ahrens 	int err;
   1003   2199     ahrens 	dsl_sync_task_group_t *dstg;
   1004   2199     ahrens 	objset_t *os;
   1005    789     ahrens 	dsl_dir_t *dd;
   1006   2199     ahrens 	uint64_t obj;
   1007  11022        Tom 	struct dsl_ds_destroyarg dsda = { 0 };
   1008  11022        Tom 	dsl_dataset_t dummy_ds = { 0 };
   1009  10242      chris 
   1010  10242      chris 	dsda.ds = ds;
   1011    789     ahrens 
   1012   5367     ahrens 	if (dsl_dataset_is_snapshot(ds)) {
   1013   2199     ahrens 		/* Destroying a snapshot is simpler */
   1014   6689     maybee 		dsl_dataset_make_exclusive(ds, tag);
   1015   7237   ek110237 
   1016  10298    Matthew 		if (ds->ds_objset != NULL) {
   1017  10298    Matthew 			dmu_objset_evict(ds->ds_objset);
   1018  10298    Matthew 			ds->ds_objset = NULL;
   1019   7237   ek110237 		}
   1020  10242      chris 		dsda.defer = defer;
   1021   2199     ahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
   1022   2199     ahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
   1023  10242      chris 		    &dsda, tag, 0);
   1024  10242      chris 		ASSERT3P(dsda.rm_origin, ==, NULL);
   1025  10385      chris 		goto out;
   1026  10385      chris 	} else if (defer) {
   1027  10385      chris 		err = EINVAL;
   1028   5367     ahrens 		goto out;
   1029   2199     ahrens 	}
   1030   2199     ahrens 
   1031   2199     ahrens 	dd = ds->ds_dir;
   1032  11022        Tom 	dummy_ds.ds_dir = dd;
   1033  11022        Tom 	dummy_ds.ds_object = ds->ds_object;
   1034   2199     ahrens 
   1035   2199     ahrens 	/*
   1036   2199     ahrens 	 * Check for errors and mark this ds as inconsistent, in
   1037   2199     ahrens 	 * case we crash while freeing the objects.
   1038   2199     ahrens 	 */
   1039   2199     ahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
   1040   2199     ahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
   1041   5367     ahrens 	if (err)
   1042   5367     ahrens 		goto out;
   1043   5367     ahrens 
   1044  10298    Matthew 	err = dmu_objset_from_ds(ds, &os);
   1045   5367     ahrens 	if (err)
   1046   5367     ahrens 		goto out;
   1047   2199     ahrens 
   1048   2199     ahrens 	/*
   1049   2199     ahrens 	 * remove the objects in open context, so that we won't
   1050   2199     ahrens 	 * have too much to do in syncing context.
   1051   2199     ahrens 	 */
   1052   3025     ahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
   1053   3025     ahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
   1054   6992     maybee 		/*
   1055   6992     maybee 		 * Ignore errors, if there is not enough disk space
   1056   6992     maybee 		 * we will deal with it in dsl_dataset_destroy_sync().
   1057   6992     maybee 		 */
   1058   6992     maybee 		(void) dmu_free_object(os, obj);
   1059   2199     ahrens 	}
   1060   2199     ahrens 
   1061   9396    Matthew 	/*
   1062   9396    Matthew 	 * We need to sync out all in-flight IO before we try to evict
   1063   9396    Matthew 	 * (the dataset evict func is trying to clear the cached entries
   1064   9396    Matthew 	 * for this dataset in the ARC).
   1065   9396    Matthew 	 */
   1066   9396    Matthew 	txg_wait_synced(dd->dd_pool, 0);
   1067   9396    Matthew 
   1068   9396    Matthew 	/*
   1069   9396    Matthew 	 * If we managed to free all the objects in open
   1070   9396    Matthew 	 * context, the user space accounting should be zero.
   1071   9396    Matthew 	 */
   1072   9396    Matthew 	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
   1073  10298    Matthew 	    dmu_objset_userused_enabled(os)) {
   1074   9396    Matthew 		uint64_t count;
   1075   9396    Matthew 
   1076   9396    Matthew 		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
   1077   9396    Matthew 		    count == 0);
   1078   9396    Matthew 		ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
   1079   9396    Matthew 		    count == 0);
   1080   9396    Matthew 	}
   1081   9396    Matthew 
   1082   2199     ahrens 	if (err != ESRCH)
   1083   5367     ahrens 		goto out;
   1084   2199     ahrens 
   1085   6975     maybee 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
   1086   6975     maybee 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
   1087   6975     maybee 	rw_exit(&dd->dd_pool->dp_config_rwlock);
   1088   6975     maybee 
   1089   6975     maybee 	if (err)
   1090   6975     maybee 		goto out;
   1091   6975     maybee 
   1092  10298    Matthew 	if (ds->ds_objset) {
   1093   6689     maybee 		/*
   1094   6689     maybee 		 * We need to sync out all in-flight IO before we try
   1095   6689     maybee 		 * to evict (the dataset evict func is trying to clear
   1096   6689     maybee 		 * the cached entries for this dataset in the ARC).
   1097   6689     maybee 		 */
   1098   6689     maybee 		txg_wait_synced(dd->dd_pool, 0);
   1099   5367     ahrens 	}
   1100    789     ahrens 
   1101   2199     ahrens 	/*
   1102   2199     ahrens 	 * Blow away the dsl_dir + head dataset.
   1103   2199     ahrens 	 */
   1104   6689     maybee 	dsl_dataset_make_exclusive(ds, tag);
   1105  10298    Matthew 	if (ds->ds_objset) {
   1106  10298    Matthew 		dmu_objset_evict(ds->ds_objset);
   1107  10298    Matthew 		ds->ds_objset = NULL;
   1108   6975     maybee 	}
   1109  10242      chris 
   1110  10242      chris 	/*
   1111  10242      chris 	 * If we're removing a clone, we might also need to remove its
   1112  10242      chris 	 * origin.
   1113  10242      chris 	 */
   1114  10242      chris 	do {
   1115  10242      chris 		dsda.need_prep = B_FALSE;
   1116  10242      chris 		if (dsl_dir_is_clone(dd)) {
   1117  10242      chris 			err = dsl_dataset_origin_rm_prep(&dsda, tag);
   1118  10242      chris 			if (err) {
   1119  10242      chris 				dsl_dir_close(dd, FTAG);
   1120  10242      chris 				goto out;
   1121  10242      chris 			}
   1122  10242      chris 		}
   1123  10242      chris 
   1124  10242      chris 		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
   1125  10242      chris 		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
   1126  10242      chris 		    dsl_dataset_destroy_sync, &dsda, tag, 0);
   1127  10242      chris 		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
   1128  11022        Tom 		    dsl_dir_destroy_sync, &dummy_ds, FTAG, 0);
   1129  10242      chris 		err = dsl_sync_task_group_wait(dstg);
   1130  10242      chris 		dsl_sync_task_group_destroy(dstg);
   1131  10242      chris 
   1132  10242      chris 		/*
   1133  10242      chris 		 * We could be racing against 'zfs release' or 'zfs destroy -d'
   1134  10242      chris 		 * on the origin snap, in which case we can get EBUSY if we
   1135  10242      chris 		 * needed to destroy the origin snap but were not ready to
   1136  10242      chris 		 * do so.
   1137  10242      chris 		 */
   1138  10242      chris 		if (dsda.need_prep) {
   1139  10242      chris 			ASSERT(err == EBUSY);
   1140  10242      chris 			ASSERT(dsl_dir_is_clone(dd));
   1141  10242      chris 			ASSERT(dsda.rm_origin == NULL);
   1142  10242      chris 		}
   1143  10242      chris 	} while (dsda.need_prep);
   1144  10242      chris 
   1145  10242      chris 	if (dsda.rm_origin != NULL)
   1146  10242      chris 		dsl_dataset_disown(dsda.rm_origin, tag);
   1147  10242      chris 
   1148   6689     maybee 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
   1149   5367     ahrens 	if (err)
   1150   2199     ahrens 		dsl_dir_close(dd, FTAG);
   1151   5367     ahrens out:
   1152   6689     maybee 	dsl_dataset_disown(ds, tag);
   1153    789     ahrens 	return (err);
   1154    789     ahrens }
   1155    789     ahrens 
   1156   3547     maybee blkptr_t *
   1157   3547     maybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
   1158    789     ahrens {
   1159   3547     maybee 	return (&ds->ds_phys->ds_bp);
   1160    789     ahrens }
   1161    789     ahrens 
   1162    789     ahrens void
   1163    789     ahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
   1164    789     ahrens {
   1165    789     ahrens 	ASSERT(dmu_tx_is_syncing(tx));
   1166    789     ahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
   1167    789     ahrens 	if (ds == NULL) {
   1168    789     ahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
   1169    789     ahrens 	} else {
   1170    789     ahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
   1171    789     ahrens 		ds->ds_phys->ds_bp = *bp;
   1172    789     ahrens 	}
   1173    789     ahrens }
   1174    789     ahrens 
   1175    789     ahrens spa_t *
   1176    789     ahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
   1177    789     ahrens {
   1178    789     ahrens 	return (ds->ds_dir->dd_pool->dp_spa);
   1179    789     ahrens }
   1180    789     ahrens 
   1181    789     ahrens void
   1182    789     ahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
   1183    789     ahrens {
   1184    789     ahrens 	dsl_pool_t *dp;
   1185    789     ahrens 
   1186    789     ahrens 	if (ds == NULL) /* this is the meta-objset */
   1187    789     ahrens 		return;
   1188    789     ahrens 
   1189  10298    Matthew 	ASSERT(ds->ds_objset != NULL);
   1190   2885     ahrens 
   1191   2885     ahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
   1192   2885     ahrens 		panic("dirtying snapshot!");
   1193    789     ahrens 
   1194    789     ahrens 	dp = ds->ds_dir->dd_pool;
   1195    789     ahrens 
   1196    789     ahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
   1197    789     ahrens 		/* up the hold count until we can be written out */
   1198    789     ahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
   1199    789     ahrens 	}
   1200    789     ahrens }
   1201    789     ahrens 
   1202   5378   ck153898 /*
   1203   5378   ck153898  * The unique space in the head dataset can be calculated by subtracting
   1204   5378   ck153898  * the space used in the most recent snapshot, that is still being used
   1205   5378   ck153898  * in this file system, from the space currently in use.  To figure out
   1206   5378   ck153898  * the space in the most recent snapshot still in use, we need to take
   1207   5378   ck153898  * the total space used in the snapshot and subtract out the space that
   1208   5378   ck153898  * has been freed up since the snapshot was taken.
   1209   5378   ck153898  */
   1210   5378   ck153898 static void
   1211   5378   ck153898 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
   1212   5378   ck153898 {
   1213   5378   ck153898 	uint64_t mrs_used;
   1214   5378   ck153898 	uint64_t dlused, dlcomp, dluncomp;
   1215   5378   ck153898 
   1216   5378   ck153898 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
   1217   5378   ck153898 
   1218   5378   ck153898 	if (ds->ds_phys->ds_prev_snap_obj != 0)
   1219   5378   ck153898 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
   1220   5378   ck153898 	else
   1221   5378   ck153898 		mrs_used = 0;
   1222   5378   ck153898 
   1223   5378   ck153898 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
   1224   5378   ck153898 	    &dluncomp));
   1225   5378   ck153898 
   1226   5378   ck153898 	ASSERT3U(dlused, <=, mrs_used);
   1227   5378   ck153898 	ds->ds_phys->ds_unique_bytes =
   1228   5378   ck153898 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
   1229   5378   ck153898 
   1230   5378   ck153898 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
   1231   5378   ck153898 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
   1232   5378   ck153898 	    SPA_VERSION_UNIQUE_ACCURATE)
   1233   5378   ck153898 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
   1234   5378   ck153898 }
   1235   5378   ck153898 
   1236   5378   ck153898 static uint64_t
   1237   5378   ck153898 dsl_dataset_unique(dsl_dataset_t *ds)
   1238   5378   ck153898 {
   1239   5378   ck153898 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
   1240   5378   ck153898 		dsl_dataset_recalc_head_uniq(ds);
   1241   5378   ck153898 
   1242   5378   ck153898 	return (ds->ds_phys->ds_unique_bytes);
   1243   5378   ck153898 }
   1244   5378   ck153898 
   1245    789     ahrens struct killarg {
   1246   7390    Matthew 	dsl_dataset_t *ds;
   1247    789     ahrens 	dmu_tx_t *tx;
   1248    789     ahrens };
   1249    789     ahrens 
   1250   7390    Matthew /* ARGSUSED */
   1251    789     ahrens static int
   1252  10922       Jeff kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
   1253  10922       Jeff     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
   1254    789     ahrens {
   1255    789     ahrens 	struct killarg *ka = arg;
   1256  10922       Jeff 	dmu_tx_t *tx = ka->tx;
   1257    789     ahrens 
   1258   7837    Matthew 	if (bp == NULL)
   1259   7837    Matthew 		return (0);
   1260    789     ahrens 
   1261  10922       Jeff 	if (zb->zb_level == ZB_ZIL_LEVEL) {
   1262  10922       Jeff 		ASSERT(zilog != NULL);
   1263   8746    Matthew 		/*
   1264   8746    Matthew 		 * It's a block in the intent log.  It has no
   1265   8746    Matthew 		 * accounting, so just free it.
   1266   8746    Matthew 		 */
   1267  10922       Jeff 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
   1268   8746    Matthew 	} else {
   1269  10922       Jeff 		ASSERT(zilog == NULL);
   1270   8746    Matthew 		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
   1271  10922       Jeff 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
   1272   8746    Matthew 	}
   1273   7390    Matthew 
   1274    789     ahrens 	return (0);
   1275   1731    bonwick }
   1276   1731    bonwick 
   1277   1731    bonwick /* ARGSUSED */
   1278   1731    bonwick static int
   1279   2199     ahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
   1280   1731    bonwick {
   1281   2199     ahrens 	dsl_dataset_t *ds = arg1;
   1282   5367     ahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
   1283   5367     ahrens 	uint64_t count;
   1284   5367     ahrens 	int err;
   1285   1731    bonwick 
   1286   1731    bonwick 	/*
   1287   1731    bonwick 	 * Can't delete a head dataset if there are snapshots of it.
   1288   1731    bonwick 	 * (Except if the only snapshots are from the branch we cloned
   1289   1731    bonwick 	 * from.)
   1290   1731    bonwick 	 */
   1291   1731    bonwick 	if (ds->ds_prev != NULL &&
   1292   1731    bonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
   1293  10816  Vitezslav 		return (EBUSY);
   1294   5367     ahrens 
   1295   5367     ahrens 	/*
   1296   5367     ahrens 	 * This is really a dsl_dir thing, but check it here so that
   1297   5367     ahrens 	 * we'll be less likely to leave this dataset inconsistent &
   1298   5367     ahrens 	 * nearly destroyed.
   1299   5367     ahrens 	 */
   1300   5367     ahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
   1301   5367     ahrens 	if (err)
   1302   5367     ahrens 		return (err);
   1303   5367     ahrens 	if (count != 0)
   1304   5367     ahrens 		return (EEXIST);
   1305   1731    bonwick 
   1306   2199     ahrens 	return (0);
   1307   2199     ahrens }
   1308   2199     ahrens 
   1309   2199     ahrens /* ARGSUSED */
   1310   2199     ahrens static void
   1311   4543      marks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
   1312   2199     ahrens {
   1313   2199     ahrens 	dsl_dataset_t *ds = arg1;
   1314   4543      marks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
   1315   2199     ahrens 
   1316   1731    bonwick 	/* Mark it as inconsistent on-disk, in case we crash */
   1317   1731    bonwick 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
   1318   2082   eschrock 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
   1319   4543      marks 
   1320   4543      marks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
   1321   4543      marks 	    cr, "dataset = %llu", ds->ds_object);
   1322    789     ahrens }
   1323    789     ahrens 
   1324  10242      chris static int
   1325  10242      chris dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
   1326  10242      chris     dmu_tx_t *tx)
   1327  10242      chris {
   1328  10242      chris 	dsl_dataset_t *ds = dsda->ds;
   1329  10242      chris 	dsl_dataset_t *ds_prev = ds->ds_prev;
   1330  10242      chris 
   1331  10242      chris 	if (dsl_dataset_might_destroy_origin(ds_prev)) {
   1332  10242      chris 		struct dsl_ds_destroyarg ndsda = {0};
   1333  10242      chris 
   1334  10242      chris 		/*
   1335  10242      chris 		 * If we're not prepared to remove the origin, don't remove
   1336  10242      chris 		 * the clone either.
   1337  10242      chris 		 */
   1338  10242      chris 		if (dsda->rm_origin == NULL) {
   1339  10242      chris 			dsda->need_prep = B_TRUE;
   1340  10242      chris 			return (EBUSY);
   1341  10242      chris 		}
   1342  10242      chris 
   1343  10242      chris 		ndsda.ds = ds_prev;
   1344  10242      chris 		ndsda.is_origin_rm = B_TRUE;
   1345  10242      chris 		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
   1346  10242      chris 	}
   1347  10242      chris 
   1348  10242      chris 	/*
   1349  10242      chris 	 * If we're not going to remove the origin after all,
   1350  10242      chris 	 * undo the open context setup.
   1351  10242      chris 	 */
   1352  10242      chris 	if (dsda->rm_origin != NULL) {
   1353  10242      chris 		dsl_dataset_disown(dsda->rm_origin, tag);
   1354  10242      chris 		dsda->rm_origin = NULL;
   1355  10242      chris 	}
   1356  10242      chris 
   1357  10242      chris 	return (0);
   1358  10242      chris }
   1359  10242      chris 
   1360   2199     ahrens /* ARGSUSED */
   1361   5367     ahrens int
   1362   2199     ahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
   1363    789     ahrens {
   1364  10242      chris 	struct dsl_ds_destroyarg *dsda = arg1;
   1365  10242      chris 	dsl_dataset_t *ds = dsda->ds;
   1366    789     ahrens 
   1367   6689     maybee 	/* we have an owner hold, so noone else can destroy us */
   1368   6689     maybee 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
   1369   6689     maybee 
   1370  10242      chris 	/*
   1371  10242      chris 	 * Only allow deferred destroy on pools that support it.
   1372  10242      chris 	 * NOTE: deferred destroy is only supported on snapshots.
   1373  10242      chris 	 */
   1374  10242      chris 	if (dsda->defer) {
   1375  10242      chris 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
   1376  10242      chris 		    SPA_VERSION_USERREFS)
   1377  10242      chris 			return (ENOTSUP);
   1378  10242      chris 		ASSERT(dsl_dataset_is_snapshot(ds));
   1379  10242      chris 		return (0);
   1380  10242      chris 	}
   1381    789     ahrens 
   1382    789     ahrens 	/*
   1383    789     ahrens 	 * Can't delete a head dataset if there are snapshots of it.
   1384    789     ahrens 	 * (Except if the only snapshots are from the branch we cloned
   1385    789     ahrens 	 * from.)
   1386    789     ahrens 	 */
   1387    789     ahrens 	if (ds->ds_prev != NULL &&
   1388   2199     ahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
   1389  10816  Vitezslav 		return (EBUSY);
   1390    789     ahrens 
   1391    789     ahrens 	/*
   1392    789     ahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
   1393    789     ahrens 	 * them.  Try again.
   1394    789     ahrens 	 */
   1395   2199     ahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
   1396    789     ahrens 		return (EAGAIN);
   1397  10242      chris 
   1398  10242      chris 	if (dsl_dataset_is_snapshot(ds)) {
   1399  10242      chris 		/*
   1400  10242      chris 		 * If this snapshot has an elevated user reference count,
   1401  10242      chris 		 * we can't destroy it yet.
   1402  10242      chris 		 */
   1403  10242      chris 		if (ds->ds_userrefs > 0 && !dsda->releasing)
   1404  10242      chris 			return (EBUSY);
   1405  10242      chris 
   1406  10242      chris 		mutex_enter(&ds->ds_lock);
   1407  10242      chris 		/*
   1408  10242      chris 		 * Can't delete a branch point. However, if we're destroying
   1409  10242      chris 		 * a clone and removing its origin due to it having a user
   1410  10242      chris 		 * hold count of 0 and having been marked for deferred destroy,
   1411  10242      chris 		 * it's OK for the origin to have a single clone.
   1412  10242      chris 		 */
   1413  10242      chris 		if (ds->ds_phys->ds_num_children >
   1414  10242      chris 		    (dsda->is_origin_rm ? 2 : 1)) {
   1415  10242      chris 			mutex_exit(&ds->ds_lock);
   1416  10242      chris 			return (EEXIST);
   1417  10242      chris 		}
   1418  10242      chris 		mutex_exit(&ds->ds_lock);
   1419  10242      chris 	} else if (dsl_dir_is_clone(ds->ds_dir)) {
   1420  10242      chris 		return (dsl_dataset_origin_check(dsda, arg2, tx));
   1421  10242      chris 	}
   1422   2199     ahrens 
   1423   2199     ahrens 	/* XXX we should do some i/o error checking... */
   1424   2199     ahrens 	return (0);
   1425   2199     ahrens }
   1426   2199     ahrens 
   1427   6689     maybee struct refsarg {
   1428   6689     maybee 	kmutex_t lock;
   1429   6689     maybee 	boolean_t gone;
   1430   6689     maybee 	kcondvar_t cv;
   1431   6689     maybee };
   1432   6689     maybee 
   1433   6689     maybee /* ARGSUSED */
   1434   6689     maybee static void
   1435   6689     maybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
   1436   6689     maybee {
   1437   6689     maybee 	struct refsarg *arg = argv;
   1438   6689     maybee 
   1439   6689     maybee 	mutex_enter(&arg->lock);
   1440   6689     maybee 	arg->gone = TRUE;
   1441   6689     maybee 	cv_signal(&arg->cv);
   1442   6689     maybee 	mutex_exit(&arg->lock);
   1443   6689     maybee }
   1444   6689     maybee 
   1445   6689     maybee static void
   1446   6689     maybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
   1447   6689     maybee {
   1448   6689     maybee 	struct refsarg arg;
   1449   6689     maybee 
   1450   6689     maybee 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
   1451   6689     maybee 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
   1452   6689     maybee 	arg.gone = FALSE;
   1453   6689     maybee 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
   1454   6689     maybee 	    dsl_dataset_refs_gone);
   1455   6689     maybee 	dmu_buf_rele(ds->ds_dbuf, tag);
   1456   6689     maybee 	mutex_enter(&arg.lock);
   1457   6689     maybee 	while (!arg.gone)
   1458   6689     maybee 		cv_wait(&arg.cv, &arg.lock);
   1459   6689     maybee 	ASSERT(arg.gone);
   1460   6689     maybee 	mutex_exit(&arg.lock);
   1461   6689     maybee 	ds->ds_dbuf = NULL;
   1462   6689     maybee 	ds->ds_phys = NULL;
   1463   6689     maybee 	mutex_destroy(&arg.lock);
   1464   6689     maybee 	cv_destroy(&arg.cv);
   1465   6689     maybee }
   1466   6689     maybee 
   1467  10801    Matthew static void
   1468  10801    Matthew remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
   1469  10801    Matthew {
   1470  10801    Matthew 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
   1471  10801    Matthew 	uint64_t count;
   1472  10801    Matthew 	int err;
   1473  10801    Matthew 
   1474  10801    Matthew 	ASSERT(ds->ds_phys->ds_num_children >= 2);
   1475  10801    Matthew 	err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
   1476  10801    Matthew 	/*
   1477  10801    Matthew 	 * The err should not be ENOENT, but a bug in a previous version
   1478  10801    Matthew 	 * of the code could cause upgrade_clones_cb() to not set
   1479  10801    Matthew 	 * ds_next_snap_obj when it should, leading to a missing entry.
   1480  10801    Matthew 	 * If we knew that the pool was created after
   1481  10801    Matthew 	 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
   1482  10801    Matthew 	 * ENOENT.  However, at least we can check that we don't have
   1483  10801    Matthew 	 * too many entries in the next_clones_obj even after failing to
   1484  10801    Matthew 	 * remove this one.
   1485  10801    Matthew 	 */
   1486  10801    Matthew 	if (err != ENOENT) {
   1487  10801    Matthew 		VERIFY3U(err, ==, 0);
   1488  10801    Matthew 	}
   1489  10801    Matthew 	ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
   1490  10801    Matthew 	    &count));
   1491  10801    Matthew 	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
   1492  10801    Matthew }
   1493  10801    Matthew 
   1494   5367     ahrens void
   1495   4543      marks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
   1496   2199     ahrens {
   1497  10242      chris 	struct dsl_ds_destroyarg *dsda = arg1;
   1498  10242      chris 	dsl_dataset_t *ds = dsda->ds;
   1499   2199     ahrens 	int err;
   1500   2199     ahrens 	int after_branch_point = FALSE;
   1501   2199     ahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
   1502   2199     ahrens 	objset_t *mos = dp->dp_meta_objset;
   1503   2199     ahrens 	dsl_dataset_t *ds_prev = NULL;
   1504   2199     ahrens 	uint64_t obj;
   1505   2199     ahrens 
   1506   6689     maybee 	ASSERT(ds->ds_owner);
   1507  10242      chris 	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
   1508   2199     ahrens 	ASSERT(ds->ds_prev == NULL ||
   1509   2199     ahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
   1510   2199     ahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
   1511  10242      chris 
   1512  10242      chris 	if (dsda->defer) {
   1513  10242      chris 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
   1514  10242      chris 		if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) {
   1515  10242      chris 			dmu_buf_will_dirty(ds->ds_dbuf, tx);
   1516  10242      chris 			ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
   1517  10242      chris 			return;
   1518  10242      chris 		}
   1519  10242      chris 	}
   1520   6689     maybee 
   1521   6689     maybee 	/* signal any waiters that this dataset is going away */
   1522   6689     maybee 	mutex_enter(&ds->ds_lock);
   1523   6689     maybee 	ds->ds_owner = dsl_reaper;
   1524   6689     maybee 	cv_broadcast(&ds->ds_exclusive_cv);
   1525   6689     maybee 	mutex_exit(&ds->ds_lock);
   1526   5378   ck153898 
   1527   5378   ck153898 	/* Remove our reservation */
   1528   5378   ck153898 	if (ds->ds_reserved != 0) {
   1529  11022        Tom 		dsl_prop_setarg_t psa;
   1530  11022        Tom 		uint64_t value = 0;
   1531  11022        Tom 
   1532  11022        Tom 		dsl_prop_setarg_init_uint64(&psa, "refreservation",
   1533  11022        Tom 		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
   1534  11022        Tom 		    &value);
   1535  11022        Tom 		psa.psa_effective_value = 0;	/* predict default value */
   1536  11022        Tom 
   1537  11022        Tom 		dsl_dataset_set_reservation_sync(ds, &psa, cr, tx);
   1538   5378   ck153898 		ASSERT3U(ds->ds_reserved, ==, 0);
   1539   5378   ck153898 	}
   1540   2199     ahrens 
   1541   2199     ahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
   1542   2199     ahrens 
   1543   7046     ahrens 	dsl_pool_ds_destroyed(ds, tx);
   1544   7046     ahrens 
   1545   2199     ahrens 	obj = ds->ds_object;
   1546    789     ahrens 
   1547    789     ahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
   1548    789     ahrens 		if (ds->ds_prev) {
   1549    789     ahrens 			ds_prev = ds->ds_prev;
   1550    789     ahrens 		} else {
   1551   6689     maybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
   1552   6689     maybee 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
   1553    789     ahrens 		}
   1554    789     ahrens 		after_branch_point =
   1555    789     ahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
   1556    789     ahrens 
   1557    789     ahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
   1558   7046     ahrens 		if (after_branch_point &&
   1559   7046     ahrens 		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
   1560  10801    Matthew 			remove_from_next_clones(ds_prev, obj, tx);
   1561   7046     ahrens 			if (ds->ds_phys->ds_next_snap_obj != 0) {
   1562   7046     ahrens 				VERIFY(0 == zap_add_int(mos,
   1563   7046     ahrens 				    ds_prev->ds_phys->ds_next_clones_obj,
   1564   7046     ahrens 				    ds->ds_phys->ds_next_snap_obj, tx));
   1565   7046     ahrens 			}
   1566   7046     ahrens 		}
   1567    789     ahrens 		if (after_branch_point &&
   1568    789     ahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
   1569    789     ahrens 			/* This clone is toast. */
   1570    789     ahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
   1571    789     ahrens 			ds_prev->ds_phys->ds_num_children--;
   1572  10242      chris 
   1573  10242      chris 			/*
   1574  10242      chris 			 * If the clone's origin has no other clones, no
   1575  10242      chris 			 * user holds, and has been marked for deferred
   1576  10242      chris 			 * deletion, then we should have done the necessary
   1577  10242      chris 			 * destroy setup for it.
   1578  10242      chris 			 */
   1579  10242      chris 			if (ds_prev->ds_phys->ds_num_children == 1 &&
   1580  10242      chris 			    ds_prev->ds_userrefs == 0 &&
   1581  10242      chris 			    DS_IS_DEFER_DESTROY(ds_prev)) {
   1582  10242      chris 				ASSERT3P(dsda->rm_origin, !=, NULL);
   1583  10242      chris 			} else {
   1584  10242      chris 				ASSERT3P(dsda->rm_origin, ==, NULL);
   1585  10242      chris 			}
   1586    789     ahrens 		} else if (!after_branch_point) {
   1587    789     ahrens 			ds_prev->ds_phys->ds_next_snap_obj =
   1588    789     ahrens 			    ds->ds_phys->ds_next_snap_obj;
   1589    789     ahrens 		}
   1590    789     ahrens 	}
   1591    789     ahrens 
   1592    789     ahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
   1593   2199     ahrens 		blkptr_t bp;
   1594    789     ahrens 		dsl_dataset_t *ds_next;
   1595    789     ahrens 		uint64_t itor = 0;
   1596   5378   ck153898 		uint64_t old_unique;
   1597   7390    Matthew 		int64_t used = 0, compressed = 0, uncompressed = 0;
   1598    789     ahrens 
   1599   6689     maybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
   1600   6689     maybee 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
   1601    789     ahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
   1602   5378   ck153898 
   1603   5378   ck153898 		old_unique = dsl_dataset_unique(ds_next);
   1604    789     ahrens 
   1605    789     ahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
   1606    789     ahrens 		ds_next->ds_phys->ds_prev_snap_obj =
   1607    789     ahrens 		    ds->ds_phys->ds_prev_snap_obj;
   1608    789     ahrens 		ds_next->ds_phys->ds_prev_snap_txg =
   1609    789     ahrens 		    ds->ds_phys->ds_prev_snap_txg;
   1610    789     ahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
   1611    789     ahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
   1612    789     ahrens 
   1613    789     ahrens 		/*
   1614    789     ahrens 		 * Transfer to our deadlist (which will become next's
   1615    789     ahrens 		 * new deadlist) any entries from next's current
   1616    789     ahrens 		 * deadlist which were born before prev, and free the
   1617    789     ahrens 		 * other entries.
   1618    789     ahrens 		 *
   1619    789     ahrens 		 * XXX we're doing this long task with the config lock held
   1620    789     ahrens 		 */
   1621   6689     maybee 		while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) {
   1622    789     ahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
   1623   1544   eschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
   1624   1544   eschrock 				    &bp, tx));
   1625    789     ahrens 				if (ds_prev && !after_branch_point &&
   1626    789     ahrens 				    bp.blk_birth >
   1627    789     ahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
   1628    789     ahrens 					ds_prev->ds_phys->ds_unique_bytes +=
   1629  10922       Jeff 					    bp_get_dsize_sync(dp->dp_spa, &bp);
   1630    789     ahrens 				}
   1631    789     ahrens 			} else {
   1632  10922       Jeff 				used += bp_get_dsize_sync(dp->dp_spa, &bp);
   1633    789     ahrens 				compressed += BP_GET_PSIZE(&bp);
   1634    789     ahrens 				uncompressed += BP_GET_UCSIZE(&bp);
   1635  10922       Jeff 				dsl_free(dp, tx->tx_txg, &bp);
   1636    789     ahrens 			}
   1637    789     ahrens 		}
   1638    789     ahrens 
   1639   7390    Matthew 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
   1640   7390    Matthew 
   1641   7390    Matthew 		/* change snapused */
   1642   7390    Matthew 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
   1643   7390    Matthew 		    -used, -compressed, -uncompressed, tx);
   1644   7390    Matthew 
   1645    789     ahrens 		/* free next's deadlist */
   1646    789     ahrens 		bplist_close(&ds_next->ds_deadlist);
   1647    789     ahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
   1648    789     ahrens 
   1649    789     ahrens 		/* set next's deadlist to our deadlist */
   1650   6689     maybee 		bplist_close(&ds->ds_deadlist);
   1651    789     ahrens 		ds_next->ds_phys->ds_deadlist_obj =
   1652    789     ahrens 		    ds->ds_phys->ds_deadlist_obj;
   1653   1544   eschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
   1654   1544   eschrock 		    ds_next->ds_phys->ds_deadlist_obj));
   1655    789     ahrens 		ds->ds_phys->ds_deadlist_obj = 0;
   1656    789     ahrens 
   1657    789     ahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
   1658    789     ahrens 			/*
   1659    789     ahrens 			 * Update next's unique to include blocks which
   1660    789     ahrens 			 * were previously shared by only this snapshot
   1661    789     ahrens 			 * and it.  Those blocks will be born after the
   1662    789     ahrens 			 * prev snap and before this snap, and will have
   1663    789     ahrens 			 * died after the next snap and before the one
   1664    789     ahrens 			 * after that (ie. be on the snap after next's
   1665    789     ahrens 			 * deadlist).
   1666    789     ahrens 			 *
   1667    789     ahrens 			 * XXX we're doing this long task with the
   1668    789     ahrens 			 * config lock held
   1669    789     ahrens 			 */
   1670    789     ahrens 			dsl_dataset_t *ds_after_next;
   1671   7390    Matthew 			uint64_t space;
   1672    789     ahrens 
   1673   6689     maybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
   1674   6689     maybee 			    ds_next->ds_phys->ds_next_snap_obj,
   1675   6689     maybee 			    FTAG, &ds_after_next));
   1676   7390    Matthew 
   1677   7390    Matthew 			VERIFY(0 ==
   1678   7390    Matthew 			    bplist_space_birthrange(&ds_after_next->ds_deadlist,
   1679   7390    Matthew 			    ds->ds_phys->ds_prev_snap_txg,
   1680   7390    Matthew 			    ds->ds_phys->ds_creation_txg, &space));
   1681   7390    Matthew 			ds_next->ds_phys->ds_unique_bytes += space;
   1682    789     ahrens 
   1683   6689     maybee 			dsl_dataset_rele(ds_after_next, FTAG);
   1684    789     ahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
   1685    789     ahrens 		} else {
   1686    789     ahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
   1687   6689     maybee 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
   1688   6689     maybee 			ds_next->ds_prev = NULL;
   1689    789     ahrens 			if (ds_prev) {
   1690   6689     maybee 				VERIFY(0 == dsl_dataset_get_ref(dp,
   1691   6689     maybee 				    ds->ds_phys->ds_prev_snap_obj,
   1692   6689     maybee 				    ds_next, &ds_next->ds_prev));
   1693    789     ahrens 			}
   1694   5378   ck153898 
   1695   5378   ck153898 			dsl_dataset_recalc_head_uniq(ds_next);
   1696   5378   ck153898 
   1697   5378   ck153898 			/*
   1698   5378   ck153898 			 * Reduce the amount of our unconsmed refreservation
   1699   5378   ck153898 			 * being charged to our parent by the amount of
   1700   5378   ck153898 			 * new unique data we have gained.
   1701   5378   ck153898 			 */
   1702   5378   ck153898 			if (old_unique < ds_next->ds_reserved) {
   1703   5378   ck153898 				int64_t mrsdelta;
   1704   5378   ck153898 				uint64_t new_unique =
   1705   5378   ck153898 				    ds_next->ds_phys->ds_unique_bytes;
   1706   5378   ck153898 
   1707   5378   ck153898 				ASSERT(old_unique <= new_unique);
   1708   5378   ck153898 				mrsdelta = MIN(new_unique - old_unique,
   1709   5378   ck153898 				    ds_next->ds_reserved - old_unique);
   1710   7390    Matthew 				dsl_dir_diduse_space(ds->ds_dir,
   1711   7390    Matthew 				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
   1712   5378   ck153898 			}
   1713    789     ahrens 		}
   1714   6689     maybee 		dsl_dataset_rele(ds_next, FTAG);
   1715    789     ahrens 	} else {
   1716    789     ahrens 		/*
   1717    789     ahrens 		 * There's no next snapshot, so this is a head dataset.
   1718    789     ahrens 		 * Destroy the deadlist.  Unless it's a clone, the
   1719    789     ahrens 		 * deadlist should be empty.  (If it's a clone, it's
   1720    789     ahrens 		 * safe to ignore the deadlist contents.)
   1721    789     ahrens 		 */
   1722    789     ahrens 		struct killarg ka;
   1723    789     ahrens 
   1724    789     ahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
   1725    789     ahrens 		bplist_close(&ds->ds_deadlist);
   1726    789     ahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
   1727    789     ahrens 		ds->ds_phys->ds_deadlist_obj = 0;
   1728    789     ahrens 
   1729    789     ahrens 		/*
   1730    789     ahrens 		 * Free everything that we point to (that's born after
   1731    789     ahrens 		 * the previous snapshot, if we are a clone)
   1732    789     ahrens 		 *
   1733   7390    Matthew 		 * NB: this should be very quick, because we already
   1734   7390    Matthew 		 * freed all the objects in open context.
   1735    789     ahrens 		 */
   1736   7390    Matthew 		ka.ds = ds;
   1737    789     ahrens 		ka.tx = tx;
   1738   7837    Matthew 		err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
   1739   7837    Matthew 		    TRAVERSE_POST, kill_blkptr, &ka);
   1740    789     ahrens 		ASSERT3U(err, ==, 0);
   1741   9390      chris 		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
   1742   7390    Matthew 		    ds->ds_phys->ds_unique_bytes == 0);
   1743  10342      chris 
   1744  10342      chris 		if (ds->ds_prev != NULL) {
   1745  10342      chris 			dsl_dataset_rele(ds->ds_prev, ds);
   1746  10342      chris 			ds->ds_prev = ds_prev = NULL;
   1747  10342      chris 		}
   1748    789     ahrens 	}
   1749    789     ahrens 
   1750   6689     maybee 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
   1751   6689     maybee 		/* Erase the link in the dir */
   1752   6689     maybee 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
   1753   6689     maybee 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
   1754   6689     maybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
   1755    789     ahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
   1756    789     ahrens 		ASSERT(err == 0);
   1757    789     ahrens 	} else {
   1758    789     ahrens 		/* remove from snapshot namespace */
   1759    789     ahrens 		dsl_dataset_t *ds_head;
   1760   6689     maybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
   1761   6689     maybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
   1762   6689     maybee 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
   1763   2207     ahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
   1764    789     ahrens #ifdef ZFS_DEBUG
   1765    789     ahrens 		{
   1766    789     ahrens 			uint64_t val;
   1767   6492       timh 
   1768   6689     maybee 			err = dsl_dataset_snap_lookup(ds_head,
   1769   6492       timh 			    ds->ds_snapname, &val);
   1770    789     ahrens 			ASSERT3U(err, ==, 0);
   1771    789     ahrens 			ASSERT3U(val, ==, obj);
   1772    789     ahrens 		}
   1773    789     ahrens #endif
   1774   6689     maybee 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
   1775    789     ahrens 		ASSERT(err == 0);
   1776   6689     maybee 		dsl_dataset_rele(ds_head, FTAG);
   1777    789     ahrens 	}
   1778    789     ahrens 
   1779    789     ahrens 	if (ds_prev && ds->ds_prev != ds_prev)
   1780   6689     maybee 		dsl_dataset_rele(ds_prev, FTAG);
   1781    789     ahrens 
   1782   5094      lling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
   1783   4543      marks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
   1784   4543      marks 	    cr, "dataset = %llu", ds->ds_object);
   1785   4543      marks 
   1786   7046     ahrens 	if (ds->ds_phys->ds_next_clones_obj != 0) {
   1787   7046     ahrens 		uint64_t count;
   1788   7046     ahrens 		ASSERT(0 == zap_count(mos,
   1789   7046     ahrens 		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
   1790   7046     ahrens 		VERIFY(0 == dmu_object_free(mos,
   1791   7046     ahrens 		    ds->ds_phys->ds_next_clones_obj, tx));
   1792   7265     ahrens 	}
   1793   7390    Matthew 	if (ds->ds_phys->ds_props_obj != 0)
   1794   7390    Matthew 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
   1795  10242      chris 	if (ds->ds_phys->ds_userrefs_obj != 0)
   1796  10242      chris 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
   1797   6689     maybee 	dsl_dir_close(ds->ds_dir, ds);
   1798   6689     maybee 	ds->ds_dir = NULL;
   1799   6689     maybee 	dsl_dataset_drain_refs(ds, tag);
   1800   2199     ahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
   1801  10242      chris 
   1802  10242      chris 	if (dsda->rm_origin) {
   1803  10242      chris 		/*
   1804  10242      chris 		 * Remove the origin of the clone we just destroyed.
   1805  10242      chris 		 */
   1806  10242      chris 		struct dsl_ds_destroyarg ndsda = {0};
   1807  10242      chris 
   1808  10342      chris 		ndsda.ds = dsda->rm_origin;
   1809  10242      chris 		dsl_dataset_destroy_sync(&ndsda, tag, cr, tx);
   1810  10242      chris 	}
   1811   2199     ahrens }
   1812   2199     ahrens 
   1813   5378   ck153898 static int
   1814   5378   ck153898 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
   1815   5378   ck153898 {
   1816   5378   ck153898 	uint64_t asize;
   1817   5378   ck153898 
   1818   5378   ck153898 	if (!dmu_tx_is_syncing(tx))
   1819   5378   ck153898 		return (0);
   1820   5378   ck153898 
   1821   5378   ck153898 	/*
   1822   5378   ck153898 	 * If there's an fs-only reservation, any blocks that might become
   1823   5378   ck153898 	 * owned by the snapshot dataset must be accommodated by space
   1824   5378   ck153898 	 * outside of the reservation.
   1825   5378   ck153898 	 */
   1826   5378   ck153898 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
   1827   5378   ck153898 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
   1828   5378   ck153898 		return (ENOSPC);
   1829   5378   ck153898 
   1830   5378   ck153898 	/*
   1831   5378   ck153898 	 * Propogate any reserved space for this snapshot to other
   1832   5378   ck153898 	 * snapshot checks in this sync group.
   1833   5378   ck153898 	 */
   1834   5378   ck153898 	if (asize > 0)
   1835   5378   ck153898 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
   1836   5378   ck153898 
   1837   5378   ck153898 	return (0);
   1838   5378   ck153898 }
   1839   5378   ck153898 
   1840   2199     ahrens /* ARGSUSED */
   1841   2199     ahrens int
   1842   2199     ahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
   1843   2199     ahrens {
   1844   5367     ahrens 	dsl_dataset_t *ds = arg1;
   1845   2199     ahrens 	const char *snapname = arg2;
   1846   2199     ahrens 	int err;
   1847   2199     ahrens 	uint64_t value;
   1848    789     ahrens 
   1849    789     ahrens 	/*
   1850   2199     ahrens 	 * We don't allow multiple snapshots of the same txg.  If there
   1851   2199     ahrens 	 * is already one, try again.
   1852    789     ahrens 	 */
   1853   2199     ahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
   1854   2199     ahrens 		return (EAGAIN);
   1855    789     ahrens 
   1856   2199     ahrens 	/*
   1857   2199     ahrens 	 * Check for conflicting name snapshot name.
   1858   2199     ahrens 	 */
   1859   6689     maybee 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
   1860   2199     ahrens 	if (err == 0)
   1861   2199     ahrens 		return (EEXIST);
   1862   2199     ahrens 	if (err != ENOENT)
   1863   2199     ahrens 		return (err);
   1864   3978   mmusante 
   1865   3978   mmusante 	/*
   1866   3978   mmusante 	 * Check that the dataset's name is not too long.  Name consists
   1867   3978   mmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
   1868   3978   mmusante 	 */
   1869   3978   mmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
   1870   3978   mmusante 		return (ENAMETOOLONG);
   1871   5378   ck153898 
   1872   5378   ck153898 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
   1873   5378   ck153898 	if (err)
   1874   5378   ck153898 		return (err);
   1875   2199     ahrens 
   1876   2199     ahrens 	ds->ds_trysnap_txg = tx->tx_txg;
   1877    789     ahrens 	return (0);
   1878    789     ahrens }
   1879    789     ahrens 
   1880   2199     ahrens void
   1881   4543      marks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
   1882    789     ahrens {
   1883   5367     ahrens 	dsl_dataset_t *ds = arg1;
   1884   2199     ahrens 	const char *snapname = arg2;
   1885   2199     ahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
   1886    789     ahrens 	dmu_buf_t *dbuf;
   1887    789     ahrens 	dsl_dataset_phys_t *dsphys;
   1888   7046     ahrens 	uint64_t dsobj, crtxg;
   1889    789     ahrens 	objset_t *mos = dp->dp_meta_objset;
   1890    789     ahrens 	int err;
   1891    789     ahrens 
   1892   2199     ahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
   1893   7046     ahrens 
   1894   7046     ahrens 	/*
   1895   7046     ahrens 	 * The origin's ds_creation_txg has to be < TXG_INITIAL
   1896   7046     ahrens 	 */
   1897   7046     ahrens 	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
   1898   7046     ahrens 		crtxg = 1;
   1899   7046     ahrens 	else
   1900   7046     ahrens 		crtxg = tx->tx_txg;
   1901    789     ahrens 
   1902    928     tabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
   1903    928     tabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
   1904   1544   eschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
   1905    789     ahrens 	dmu_buf_will_dirty(dbuf, tx);
   1906    789     ahrens 	dsphys = dbuf->db_data;
   1907   6689     maybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
   1908   2199     ahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
   1909    789     ahrens 	dsphys->ds_fsid_guid = unique_create();
   1910    789     ahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
   1911    789     ahrens 	    sizeof (dsphys->ds_guid));
   1912    789     ahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
   1913    789     ahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
   1914    789     ahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
   1915    789     ahrens 	dsphys->ds_num_children = 1;
   1916    789     ahrens 	dsphys->ds_creation_time = gethrestime_sec();
   1917   7046     ahrens 	dsphys->ds_creation_txg = crtxg;
   1918    789     ahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
   1919    789     ahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
   1920    789     ahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
   1921    789     ahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
   1922   2082   eschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
   1923    789     ahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
   1924   1544   eschrock 	dmu_buf_rele(dbuf, FTAG);
   1925    789     ahrens 
   1926   2199     ahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
   1927   2199     ahrens 	if (ds->ds_prev) {
   1928   7046     ahrens 		uint64_t next_clones_obj =
   1929   7046     ahrens 		    ds->ds_prev->ds_phys->ds_next_clones_obj;
   1930   2199     ahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
   1931    789     ahrens 		    ds->ds_object ||
   1932   2199     ahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
   1933   2199     ahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
   1934   2199     ahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
   1935    789     ahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
   1936   2199     ahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
   1937   2199     ahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
   1938   7046     ahrens 		} else if (next_clones_obj != 0) {
   1939  10801    Matthew 			remove_from_next_clones(ds->ds_prev,
   1940  10801    Matthew 			    dsphys->ds_next_snap_obj, tx);
   1941   7046     ahrens 			VERIFY3U(0, ==, zap_add_int(mos,
   1942   7046     ahrens 			    next_clones_obj, dsobj, tx));
   1943    789     ahrens 		}
   1944    789     ahrens 	}
   1945    789     ahrens 
   1946   5378   ck153898 	/*
   1947   5378   ck153898 	 * If we have a reference-reservation on this dataset, we will
   1948   5378   ck153898 	 * need to increase the amount of refreservation being charged
   1949   5378   ck153898 	 * since our unique space is going to zero.
   1950   5378   ck153898 	 */
   1951   5378   ck153898 	if (ds->ds_reserved) {
   1952   5378   ck153898 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
   1953   7390    Matthew 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
   1954   7390    Matthew 		    add, 0, 0, tx);
   1955   5378   ck153898 	}
   1956   5378   ck153898 
   1957    789     ahrens 	bplist_close(&ds->ds_deadlist);
   1958    789     ahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
   1959   5712     ahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
   1960    789     ahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
   1961   7046     ahrens 	ds->ds_phys->ds_prev_snap_txg = crtxg;
   1962    789     ahrens 	ds->ds_phys->ds_unique_bytes = 0;
   1963   5378   ck153898 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
   1964   5378   ck153898 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
   1965    789     ahrens 	ds->ds_phys->ds_deadlist_obj =
   1966    789     ahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
   1967   1544   eschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
   1968   1544   eschrock 	    ds->ds_phys->ds_deadlist_obj));
   1969    789     ahrens 
   1970    789     ahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
   1971    789     ahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
   1972    789     ahrens 	    snapname, 8, 1, &dsobj, tx);
   1973    789     ahrens 	ASSERT(err == 0);
   1974    789     ahrens 
   1975    789     ahrens 	if (ds->ds_prev)
   1976   6689     maybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
   1977   6689     maybee 	VERIFY(0 == dsl_dataset_get_ref(dp,
   1978   6689     maybee 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
   1979   7046     ahrens 
   1980   7046     ahrens 	dsl_pool_ds_snapshotted(ds, tx);
   1981  10373      chris 
   1982  10373      chris 	dsl_dir_snap_cmtime_update(ds->ds_dir);
   1983   4543      marks 
   1984   4543      marks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
   1985   4603     ahrens 	    "dataset = %llu", dsobj);
   1986    789     ahrens }
   1987    789     ahrens 
   1988    789     ahrens void
   1989   3547     maybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
   1990    789     ahrens {
   1991    789     ahrens 	ASSERT(dmu_tx_is_syncing(tx));
   1992  10298    Matthew 	ASSERT(ds->ds_objset != NULL);
   1993    789     ahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
   1994    789     ahrens 
   1995   4787     ahrens 	/*
   1996   4787     ahrens 	 * in case we had to change ds_fsid_guid when we opened it,
   1997   4787     ahrens 	 * sync it out now.
   1998   4787     ahrens 	 */
   1999   4787     ahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
   2000   4787     ahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
   2001   4787     ahrens 
   2002    789     ahrens 	dsl_dir_dirty(ds->ds_dir, tx);
   2003  10298    Matthew 	dmu_objset_sync(ds->ds_objset, zio, tx);
   2004    789     ahrens }
   2005    789     ahrens 
   2006    789     ahrens void
   2007   2885     ahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
   2008    789     ahrens {
   2009   5378   ck153898 	uint64_t refd, avail, uobjs, aobjs;
   2010   5378   ck153898 
   2011   2885     ahrens 	dsl_dir_stats(ds->ds_dir, nv);
   2012   5378   ck153898 
   2013   5378   ck153898 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
   2014   5378   ck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
   2015   5378   ck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
   2016    789     ahrens 
   2017   2885     ahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
   2018   2885     ahrens 	    ds->ds_phys->ds_creation_time);
   2019   2885     ahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
   2020   2885     ahrens 	    ds->ds_phys->ds_creation_txg);
   2021   5378   ck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
   2022   5378   ck153898 	    ds->ds_quota);
   2023   5378   ck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
   2024   5378   ck153898 	    ds->ds_reserved);
   2025   6643   eschrock 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
   2026   6643   eschrock 	    ds->ds_phys->ds_guid);
   2027  10575       Eric 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
   2028  10575       Eric 	    dsl_dataset_unique(ds));
   2029  10575       Eric 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
   2030  10575       Eric 	    ds->ds_object);
   2031  11022        Tom 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
   2032  11022        Tom 	    ds->ds_userrefs);
   2033  10242      chris 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
   2034  10242      chris 	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
   2035    789     ahrens 
   2036    789     ahrens 	if (ds->ds_phys->ds_next_snap_obj) {
   2037    789     ahrens 		/*
   2038    789     ahrens 		 * This is a snapshot; override the dd's space used with
   2039   2885     ahrens 		 * our unique space and compression ratio.
   2040    789     ahrens 		 */
   2041   2885     ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
   2042   2885     ahrens 		    ds->ds_phys->ds_unique_bytes);
   2043   2885     ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
   2044   2885     ahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
   2045   2885     ahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
   2046   2885     ahrens 		    ds->ds_phys->ds_compressed_bytes));
   2047    789     ahrens 	}
   2048    789     ahrens }
   2049    789     ahrens 
   2050   2885     ahrens void
   2051   2885     ahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
   2052    789     ahrens {
   2053   2885     ahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
   2054   2885     ahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
   2055   5367     ahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
   2056   2885     ahrens 	if (ds->ds_phys->ds_next_snap_obj) {
   2057   2885     ahrens 		stat->dds_is_snapshot = B_TRUE;
   2058   2885     ahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
   2059   8228       Eric 	} else {
   2060   8228       Eric 		stat->dds_is_snapshot = B_FALSE;
   2061   8228       Eric 		stat->dds_num_clones = 0;
   2062   2885     ahrens 	}
   2063   2885     ahrens 
   2064   2885     ahrens 	/* clone origin is really a dsl_dir thing... */
   2065   5446     ahrens 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
   2066   7046     ahrens 	if (dsl_dir_is_clone(ds->ds_dir)) {
   2067   2885     ahrens 		dsl_dataset_t *ods;
   2068   2885     ahrens 
   2069   6689     maybee 		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
   2070   6689     maybee 		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
   2071   5367     ahrens 		dsl_dataset_name(ods, stat->dds_origin);
   2072   6689     maybee 		dsl_dataset_drop_ref(ods, FTAG);
   2073   8228       Eric 	} else {
   2074   8228       Eric 		stat->dds_origin[0] = '\0';
   2075   2885     ahrens 	}
   2076   5446     ahrens 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
   2077   2885     ahrens }
   2078   2885     ahrens 
   2079   2885     ahrens uint64_t
   2080   2885     ahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
   2081   2885     ahrens {
   2082   4787     ahrens 	return (ds->ds_fsid_guid);
   2083   2885     ahrens }
   2084   2885     ahrens 
   2085   2885     ahrens void
   2086   2885     ahrens dsl_dataset_space(dsl_dataset_t *ds,
   2087   2885     ahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
   2088   2885     ahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
   2089   2885     ahrens {
   2090   2885     ahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
   2091   2885     ahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
   2092   5378   ck153898 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
   2093   5378   ck153898 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
   2094   5378   ck153898 	if (ds->ds_quota != 0) {
   2095   5378   ck153898 		/*
   2096   5378   ck153898 		 * Adjust available bytes according to refquota
   2097   5378   ck153898 		 */
   2098   5378   ck153898 		if (*refdbytesp < ds->ds_quota)
   2099   5378   ck153898 			*availbytesp = MIN(*availbytesp,
   2100   5378   ck153898 			    ds->ds_quota - *refdbytesp);
   2101   5378   ck153898 		else
   2102   5378   ck153898 			*availbytesp = 0;
   2103   5378   ck153898 	}
   2104   2885     ahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
   2105   2885     ahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
   2106    789     ahrens }
   2107    789     ahrens 
   2108   5326   ek110237 boolean_t
   2109   5326   ek110237 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
   2110   5326   ek110237 {
   2111   5326   ek110237 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
   2112   5326   ek110237 
   2113   5326   ek110237 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
   2114   5326   ek110237 	    dsl_pool_sync_context(dp));
   2115   5326   ek110237 	if (ds->ds_prev == NULL)
   2116   5326   ek110237 		return (B_FALSE);
   2117   5326   ek110237 	if (ds->ds_phys->ds_bp.blk_birth >
   2118   5326   ek110237 	    ds->ds_prev->ds_phys->ds_creation_txg)
   2119   5326   ek110237 		return (B_TRUE);
   2120   5326   ek110237 	return (B_FALSE);
   2121   5326   ek110237 }
   2122   5326   ek110237 
   2123   2199     ahrens /* ARGSUSED */
   2124   2199     ahrens static int
   2125   2199     ahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
   2126   2199     ahrens {
   2127   2199     ahrens 	dsl_dataset_t *ds = arg1;
   2128   2199     ahrens 	char *newsnapname = arg2;
   2129   2199     ahrens 	dsl_dir_t *dd = ds->ds_dir;
   2130   2199     ahrens 	dsl_dataset_t *hds;
   2131   2199     ahrens 	uint64_t val;
   2132   2199     ahrens 	int err;
   2133    789     ahrens 
   2134   6689     maybee 	err = dsl_dataset_hold_obj(dd->dd_pool,
   2135   6689     maybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
   2136    789     ahrens 	if (err)
   2137    789     ahrens 		return (err);
   2138    789     ahrens 
   2139   2199     ahrens 	/* new name better not be in use */
   2140   6689     maybee 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
   2141   6689     maybee 	dsl_dataset_rele(hds, FTAG);
   2142    789     ahrens 
   2143   2199     ahrens 	if (err == 0)
   2144   2199     ahrens 		err = EEXIST;
   2145   2199     ahrens 	else if (err == ENOENT)
   2146   2199     ahrens 		err = 0;
   2147   4007   mmusante 
   2148   4007   mmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
   2149   4007   mmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
   2150   4007   mmusante 		err = ENAMETOOLONG;
   2151   4007   mmusante 
   2152   2199     ahrens 	return (err);
   2153   2199     ahrens }
   2154    789     ahrens 
   2155   2199     ahrens static void
   2156   4543      marks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
   2157   4543      marks     cred_t *cr, dmu_tx_t *tx)
   2158   2199     ahrens {
   2159   2199     ahrens 	dsl_dataset_t *ds = arg1;
   2160   4543      marks 	const char *newsnapname = arg2;
   2161   2199     ahrens 	dsl_dir_t *dd = ds->ds_dir;
   2162   2199     ahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
   2163   2199     ahrens 	dsl_dataset_t *hds;
   2164   2199     ahrens 	int err;
   2165    789     ahrens 
   2166   2199     ahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
   2167    789     ahrens 
   2168   6689     maybee 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
   2169   6689     maybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
   2170    789     ahrens 
   2171   2199     ahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
   2172   6689     maybee 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
   2173   2199     ahrens 	ASSERT3U(err, ==, 0);
   2174   2199     ahrens 	mutex_enter(&ds->ds_lock);
   2175   2199     ahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
   2176   2199     ahrens 	mutex_exit(&ds->ds_lock);
   2177   2199     ahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
   2178   2199     ahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
   2179   2199     ahrens 	ASSERT3U(err, ==, 0);
   2180    789     ahrens 
   2181   4543      marks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
   2182   4543      marks 	    cr, "dataset = %llu", ds->ds_object);
   2183   6689     maybee 	dsl_dataset_rele(hds, FTAG);
   2184    789     ahrens }
   2185    789     ahrens 
   2186   5326   ek110237 struct renamesnaparg {
   2187   4007   mmusante 	dsl_sync_task_group_t *dstg;
   2188   4007   mmusante 	char failed[MAXPATHLEN];
   2189   4007   mmusante 	char *oldsnap;
   2190   4007   mmusante 	char *newsnap;
   2191   4007   mmusante };
   2192   4007   mmusante 
   2193   4007   mmusante static int
   2194   4007   mmusante dsl_snapshot_rename_one(char *name, void *arg)
   2195   4007   mmusante {
   2196   5326   ek110237 	struct renamesnaparg *ra = arg;
   2197   4007   mmusante 	dsl_dataset_t *ds = NULL;
   2198   4007   mmusante 	char *cp;
   2199   4007   mmusante 	int err;
   2200   4007   mmusante 
   2201   4007   mmusante 	cp = name + strlen(name);
   2202   4007   mmusante 	*cp = '@';
   2203   4007   mmusante 	(void) strcpy(cp + 1, ra->oldsnap);
   2204   4543      marks 
   2205   4543      marks 	/*
   2206   4543      marks 	 * For recursive snapshot renames the parent won't be changing
   2207   4543      marks 	 * so we just pass name for both the to/from argument.
   2208   4543      marks 	 */
   2209   7312    Matthew 	err = zfs_secpolicy_rename_perms(name, name, CRED());
   2210   7312    Matthew 	if (err == ENOENT) {
   2211   7312    Matthew 		return (0);
   2212   7312    Matthew 	} else if (err) {
   2213   4543      marks 		(void) strcpy(ra->failed, name);
   2214   4543      marks 		return (err);
   2215   4543      marks 	}
   2216   4543      marks 
   2217   6689     maybee #ifdef _KERNEL
   2218   6689     maybee 	/*
   2219   6689     maybee 	 * For all filesystems undergoing rename, we'll need to unmount it.
   2220   6689     maybee 	 */
   2221   6689     maybee 	(void) zfs_unmount_snap(name, NULL);
   2222   6689     maybee #endif
   2223   6689     maybee 	err = dsl_dataset_hold(name, ra->dstg, &ds);
   2224   6689     maybee 	*cp = '\0';
   2225   4007   mmusante 	if (err == ENOENT) {
   2226   4007   mmusante 		return (0);
   2227   6689     maybee 	} else if (err) {
   2228   4007   mmusante 		(void) strcpy(ra->failed, name);
   2229   4007   mmusante 		return (err);
   2230   4007   mmusante 	}
   2231   4007   mmusante 
   2232   4007   mmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
   2233   4007   mmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
   2234   4007   mmusante 
   2235   4007   mmusante 	return (0);
   2236   4007   mmusante }
   2237   4007   mmusante 
   2238   4007   mmusante static int
   2239   4007   mmusante dsl_recursive_rename(char *oldname, const char *newname)
   2240   4007   mmusante {
   2241   4007   mmusante 	int err;
   2242   5326   ek110237 	struct renamesnaparg *ra;
   2243   4007   mmusante 	dsl_sync_task_t *dst;
   2244   4007   mmusante 	spa_t *spa;
   2245   4007   mmusante 	char *cp, *fsname = spa_strdup(oldname);
   2246   4007   mmusante 	int len = strlen(oldname);
   2247   4007   mmusante 
   2248   4007   mmusante 	/* truncate the snapshot name to get the fsname */
   2249   4007   mmusante 	cp = strchr(fsname, '@');
   2250   4007   mmusante 	*cp = '\0';
   2251   4007   mmusante 
   2252   4603     ahrens 	err = spa_open(fsname, &spa, FTAG);
   2253   4007   mmusante 	if (err) {
   2254   4007   mmusante 		kmem_free(fsname, len + 1);
   2255   4007   mmusante 		return (err);
   2256   4007   mmusante 	}
   2257   5326   ek110237 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
   2258   4007   mmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
   2259   4007   mmusante 
   2260   4007   mmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
   2261   4007   mmusante 	ra->newsnap = strchr(newname, '@') + 1;
   2262   4007   mmusante 	*ra->failed = '\0';
   2263   4007   mmusante 
   2264   4007   mmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
   2265   4007   mmusante 	    DS_FIND_CHILDREN);
   2266   4007   mmusante 	kmem_free(fsname, len + 1);
   2267   4007   mmusante 
   2268   4007   mmusante 	if (err == 0) {
   2269   4007   mmusante 		err = dsl_sync_task_group_wait(ra->dstg);
   2270   4007   mmusante 	}
   2271   4007   mmusante 
   2272   4007   mmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
   2273   4007   mmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
   2274   4007   mmusante 		dsl_dataset_t *ds = dst->dst_arg1;
   2275   4007   mmusante 		if (dst->dst_err) {
   2276   4007   mmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
   2277   4009   mmusante 			(void) strcat(ra->failed, "@");
   2278   4009   mmusante 			(void) strcat(ra->failed, ra->newsnap);
   2279   4007   mmusante 		}
   2280   6689     maybee 		dsl_dataset_rele(ds, ra->dstg);
   2281   4007   mmusante 	}
   2282   4007   mmusante 
   2283   4543      marks 	if (err)
   2284   4543      marks 		(void) strcpy(oldname, ra->failed);
   2285   4007   mmusante 
   2286   4007   mmusante 	dsl_sync_task_group_destroy(ra->dstg);
   2287   5326   ek110237 	kmem_free(ra, sizeof (struct renamesnaparg));
   2288   4007   mmusante 	spa_close(spa, FTAG);
   2289   4007   mmusante 	return (err);
   2290   4007   mmusante }
   2291   4007   mmusante 
   2292   4569   mmusante static int
   2293   4569   mmusante dsl_valid_rename(char *oldname, void *arg)
   2294   4569   mmusante {
   2295   4569   mmusante 	int delta = *(int *)arg;
   2296   4569   mmusante 
   2297   4569   mmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
   2298   4569   mmusante 		return (ENAMETOOLONG);
   2299   4569   mmusante 
   2300   4569   mmusante 	return (0);
   2301   4569   mmusante }
   2302   4569   mmusante 
   2303    789     ahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
   2304    789     ahrens int
   2305   6689     maybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
   2306    789     ahrens {
   2307    789     ahrens 	dsl_dir_t *dd;
   2308   2199     ahrens 	dsl_dataset_t *ds;
   2309    789     ahrens 	const char *tail;
   2310    789     ahrens 	int err;
   2311    789     ahrens 
   2312   2199     ahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
   2313   1544   eschrock 	if (err)
   2314   1544   eschrock 		return (err);
   2315   8517       Eric 	/*
   2316   8517       Eric 	 * If there are more than 2 references there may be holds
   2317   8517       Eric 	 * hanging around that haven't been cleared out yet.
   2318   8517       Eric 	 */
   2319   8517       Eric 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
   2320   8517       Eric 		txg_wait_synced(dd->dd_pool, 0);
   2321    789     ahrens 	if (tail == NULL) {
   2322   4569   mmusante 		int delta = strlen(newname) - strlen(oldname);
   2323   4569   mmusante 
   2324   7046     ahrens 		/* if we're growing, validate child name lengths */
   2325   4569   mmusante 		if (delta > 0)
   2326   4569   mmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
   2327   4569   mmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
   2328   4569   mmusante 
   2329   4569   mmusante 		if (!err)
   2330   4569   mmusante 			err = dsl_dir_rename(dd, newname);
   2331    789     ahrens 		dsl_dir_close(dd, FTAG);
   2332    789     ahrens 		return (err);
   2333    789     ahrens 	}
   2334    789     ahrens 	if (tail[0] != '@') {
   2335  10588       Eric 		/* the name ended in a nonexistent component */
   2336    789     ahrens 		dsl_dir_close(dd, FTAG);
   2337    789     ahrens 		return (ENOENT);
   2338    789     ahrens 	}
   2339    789     ahrens 
   2340   2199     ahrens 	dsl_dir_close(dd, FTAG);
   2341    789     ahrens 
   2342   2199     ahrens 	/* new name must be snapshot in same filesystem */
   2343   2199     ahrens 	tail = strchr(newname, '@');
   2344   2199     ahrens 	if (tail == NULL)
   2345   2199     ahrens 		return (EINVAL);
   2346   2199     ahrens 	tail++;
   2347   2199     ahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
   2348   2199     ahrens 		return (EXDEV);
   2349   2199     ahrens 
   2350   4007   mmusante 	if (recursive) {
   2351   4007   mmusante 		err = dsl_recursive_rename(oldname, newname);
   2352   4007   mmusante 	} else {
   2353   6689     maybee 		err = dsl_dataset_hold(oldname, FTAG, &ds);
   2354   4007   mmusante 		if (err)
   2355   4007   mmusante 			return (err);
   2356   2199     ahrens 
   2357   4007   mmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
   2358   4007   mmusante 		    dsl_dataset_snapshot_rename_check,
   2359   4007   mmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
   2360   2199     ahrens 
   2361   6689     maybee 		dsl_dataset_rele(ds, FTAG);
   2362   4007   mmusante 	}
   2363   2199     ahrens 
   2364    789     ahrens 	return (err);
   2365    789     ahrens }
   2366   2082   eschrock 
   2367   7046     ahrens struct promotenode {
   2368   6689     maybee 	list_node_t link;
   2369   6689     maybee 	dsl_dataset_t *ds;
   2370   6689     maybee };
   2371   6689     maybee 
   2372   2199     ahrens struct promotearg {
   2373   7390    Matthew 	list_t shared_snaps, origin_snaps, clone_snaps;
   2374   7390    Matthew 	dsl_dataset_t *origin_origin, *origin_head;
   2375   7390    Matthew 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
   2376  10588       Eric 	char *err_ds;
   2377   2199     ahrens };
   2378   7390    Matthew 
   2379   7390    Matthew static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
   2380   2199     ahrens 
   2381   4543      marks /* ARGSUSED */
   2382   2082   eschrock static int
   2383   2199     ahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
   2384   2082   eschrock {
   2385   2199     ahrens 	dsl_dataset_t *hds = arg1;
   2386   2199     ahrens 	struct promotearg *pa = arg2;
   2387   7390    Matthew 	struct promotenode *snap = list_head(&pa->shared_snaps);
   2388   6689     maybee 	dsl_dataset_t *origin_ds = snap->ds;
   2389   6689     maybee 	int err;
   2390   2082   eschrock 
   2391   7046     ahrens 	/* Check that it is a real clone */
   2392   7046     ahrens 	if (!dsl_dir_is_clone(hds->ds_dir))
   2393   2082   eschrock 		return (EINVAL);
   2394   2082   eschrock 
   2395   2199     ahrens 	/* Since this is so expensive, don't do the preliminary check */
   2396   2199     ahrens 	if (!dmu_tx_is_syncing(tx))
   2397   2199     ahrens 		return (0);
   2398   2199     ahrens 
   2399   6689     maybee 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
   2400   6689     maybee 		return (EXDEV);
   2401   2082   eschrock 
   2402   5367     ahrens 	/* compute origin's new unique space */
   2403   7390    Matthew 	snap = list_tail(&pa->clone_snaps);
   2404   7390    Matthew 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
   2405   7390    Matthew 	err = bplist_space_birthrange(&snap->ds->ds_deadlist,
   2406   7390    Matthew 	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique);
   2407   7390    Matthew 	if (err)
   2408   6689     maybee 		return (err);
   2409   6689     maybee 
   2410   6689     maybee 	/*
   2411   6689     maybee 	 * Walk the snapshots that we are moving
   2412   6689     maybee 	 *
   2413   7390    Matthew 	 * Compute space to transfer.  Consider the incremental changes
   2414   7390    Matthew 	 * to used for each snapshot:
   2415   7390    Matthew 	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
   2416   7390    Matthew 	 * So each snapshot gave birth to:
   2417   7390    Matthew 	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
   2418   6689     maybee 	 * So a sequence would look like:
   2419   7390    Matthew 	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
   2420   6689     maybee 	 * Which simplifies to:
   2421   7390    Matthew 	 * uN + kN + kN-1 + ... + k1 + k0
   2422   6689     maybee 	 * Note however, if we stop before we reach the ORIGIN we get:
   2423   7390    Matthew 	 * uN + kN + kN-1 + ... + kM - uM-1
   2424   6689     maybee 	 */
   2425   6689     maybee 	pa->used = origin_ds->ds_phys->ds_used_bytes;
   2426   6689     maybee 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
   2427   6689     maybee 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
   2428   7390    Matthew 	for (snap = list_head(&pa->shared_snaps); snap;
   2429   7390    Matthew 	    snap = list_next(&pa->shared_snaps, snap)) {
   2430   2082   eschrock 		uint64_t val, dlused, dlcomp, dluncomp;
   2431   6689     maybee 		dsl_dataset_t *ds = snap->ds;
   2432   2082   eschrock 
   2433   2082   eschrock 		/* Check that the snapshot name does not conflict */
   2434   7390    Matthew 		VERIFY(0 == dsl_dataset_get_snapname(ds));
   2435   6689     maybee 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
   2436  10588       Eric 		if (err == 0) {
   2437  10588       Eric 			err = EEXIST;
   2438  10588       Eric 			goto out;
   2439  10588       Eric 		}
   2440   6689     maybee 		if (err != ENOENT)
   2441  10588       Eric 			goto out;
   2442   6689     maybee 
   2443   6689     maybee 		/* The very first snapshot does not have a deadlist */
   2444   7390    Matthew 		if (ds->ds_phys->ds_prev_snap_obj == 0)
   2445   7390    Matthew 			continue;
   2446   7390    Matthew 
   2447   7390    Matthew 		if (err = bplist_space(&ds->ds_deadlist,
   2448   7390    Matthew 		    &dlused, &dlcomp, &dluncomp))
   2449  10588       Eric 			goto out;
   2450   7390    Matthew 		pa->used += dlused;
   2451   7390    Matthew 		pa->comp += dlcomp;
   2452   7390    Matthew 		pa->uncomp += dluncomp;
   2453   7390    Matthew 	}
   2454   2082   eschrock 
   2455   6689     maybee 	/*
   2456   6689     maybee 	 * If we are a clone of a clone then we never reached ORIGIN,
   2457   6689     maybee 	 * so we need to subtract out the clone origin's used space.
   2458   6689     maybee 	 */
   2459   7390    Matthew 	if (pa->origin_origin) {
   2460   7390    Matthew 		pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
   2461   7390    Matthew 		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
   2462   7390    Matthew 		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
   2463   2082   eschrock 	}
   2464   2082   eschrock 
   2465   7390    Matthew 	/* Check that there is enough space here */
   2466   7390    Matthew 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
   2467   7390    Matthew 	    pa->used);
   2468   7390    Matthew 	if (err)
   2469   7390    Matthew 		return (err);
   2470   6689     maybee 
   2471   7390    Matthew 	/*
   2472   7390    Matthew 	 * Compute the amounts of space that will be used by snapshots
   2473   7390    Matthew 	 * after the promotion (for both origin and clone).  For each,
   2474   7390    Matthew 	 * it is the amount of space that will be on all of their
   2475   7390    Matthew 	 * deadlists (that was not born before their new origin).
   2476   7390    Matthew 	 */
   2477   7390    Matthew 	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
   2478   7390    Matthew 		uint64_t space;
   2479   7390    Matthew 
   2480   7390    Matthew 		/*
   2481   7390    Matthew 		 * Note, typically this will not be a clone of a clone,
   2482   7390    Matthew 		 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so
   2483   7390    Matthew 		 * these snaplist_space() -> bplist_space_birthrange()
   2484   7390    Matthew 		 * calls will be fast because they do not have to
   2485   7390    Matthew 		 * iterate over all bps.
   2486   7390    Matthew 		 */
   2487   7390    Matthew 		snap = list_head(&pa->origin_snaps);
   2488   7390    Matthew 		err = snaplist_space(&pa->shared_snaps,
   2489   7390    Matthew 		    snap->ds->ds_origin_txg, &pa->cloneusedsnap);
   2490   7390    Matthew 		if (err)
   2491   7390    Matthew 			return (err);
   2492   7390    Matthew 
   2493   7390    Matthew 		err = snaplist_space(&pa->clone_snaps,
   2494   7390    Matthew 		    snap->ds->ds_origin_txg, &space);
   2495   7390    Matthew 		if (err)
   2496   7390    Matthew 			return (err);
   2497   7390    Matthew 		pa->cloneusedsnap += space;
   2498   7390    Matthew 	}
   2499   7390    Matthew 	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
   2500   7390    Matthew 		err = snaplist_space(&pa->origin_snaps,
   2501   7390    Matthew 		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
   2502   7390    Matthew 		if (err)
   2503   7390    Matthew 			return (err);
   2504   6689     maybee 	}
   2505   2082   eschrock 
   2506   7390    Matthew 	return (0);
   2507  10588       Eric out:
   2508  10588       Eric 	pa->err_ds =  snap->ds->ds_snapname;
   2509  10588       Eric 	return (err);
   2510   2199     ahrens }
   2511   2199     ahrens 
   2512   2199     ahrens static void
   2513   4543      marks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
   2514   2199     ahrens {
   2515   2199     ahrens 	dsl_dataset_t *hds = arg1;
   2516   2199     ahrens 	struct promotearg *pa = arg2;
   2517   7390    Matthew 	struct promotenode *snap = list_head(&pa->shared_snaps);
   2518   6689     maybee 	dsl_dataset_t *origin_ds = snap->ds;
   2519   7390    Matthew 	dsl_dataset_t *origin_head;
   2520   2199     ahrens 	dsl_dir_t *dd = hds->ds_dir;
   2521   2199     ahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
   2522   5367     ahrens 	dsl_dir_t *odd = NULL;
   2523   7046     ahrens 	uint64_t oldnext_obj;
   2524   7390    Matthew 	int64_t delta;
   2525   2199     ahrens 
   2526   2199     ahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
   2527   7390    Matthew 
   2528   7390    Matthew 	snap = list_head(&pa->origin_snaps);
   2529   7390    Matthew 	origin_head = snap->ds;
   2530   2199     ahrens 
   2531   2417     ahrens 	/*
   2532   5367     ahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
   2533   2417     ahrens 	 * changing.
   2534   2417     ahrens 	 */
   2535   5367     ahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
   2536   5367     ahrens 	    NULL, FTAG, &odd));
   2537   2082   eschrock 
   2538   6689     maybee 	/* change origin's next snap */
   2539   6689     maybee 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
   2540   7046     ahrens 	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
   2541   7390    Matthew 	snap = list_tail(&pa->clone_snaps);
   2542   7390    Matthew 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
   2543   7390    Matthew 	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
   2544   7046     ahrens 
   2545   7046     ahrens 	/* change the origin's next clone */
   2546   7046     ahrens 	if (origin_ds->ds_phys->ds_next_clones_obj) {
   2547  10801    Matthew 		remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
   2548   7046     ahrens 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
   2549   7046     ahrens 		    origin_ds->ds_phys->ds_next_clones_obj,
   2550   7046     ahrens 		    oldnext_obj, tx));
   2551   7046     ahrens 	}
   2552   6689     maybee 
   2553   6689     maybee 	/* change origin */
   2554   6689     maybee 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
   2555   6689     maybee 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
   2556   6689     maybee 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
   2557   7390    Matthew 	hds->ds_origin_txg = origin_head->ds_origin_txg;
   2558   6689     maybee 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
   2559   6689     maybee 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
   2560   7390    Matthew 	origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg;
   2561   6689     maybee 
   2562   2082   eschrock 	/* move snapshots to this dir */
   2563   7390    Matthew 	for (snap = list_head(&pa->shared_snaps); snap;
   2564   7390    Matthew 	    snap = list_next(&pa->shared_snaps, snap)) {
   2565   6689     maybee 		dsl_dataset_t *ds = snap->ds;
   2566   2082   eschrock 
   2567   7237   ek110237 		/* unregister props as dsl_dir is changing */
   2568  10298    Matthew 		if (ds->ds_objset) {
   2569  10298    Matthew 			dmu_objset_evict(ds->ds_objset);
   2570  10298    Matthew 			ds->ds_objset = NULL;
   2571   7237   ek110237 		}
   2572   2082   eschrock 		/* move snap name entry */
   2573   7390    Matthew 		VERIFY(0 == dsl_dataset_get_snapname(ds));
   2574   7390    Matthew 		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
   2575   6689     maybee 		    ds->ds_snapname, tx));
   2576   2199     ahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
   2577   2082   eschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
   2578   2082   eschrock 		    8, 1, &ds->ds_object, tx));
   2579   2082   eschrock 		/* change containing dsl_dir */
   2580   2082   eschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
   2581   5367     ahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
   2582   2082   eschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
   2583   5367     ahrens 		ASSERT3P(ds->ds_dir, ==, odd);
   2584   2082   eschrock 		dsl_dir_close(ds->ds_dir, ds);
   2585   2199     ahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
   2586   2082   eschrock 		    NULL, ds, &ds->ds_dir));
   2587   2082   eschrock 
   2588   2082   eschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
   2589   7390    Matthew 	}
   2590   2082   eschrock 
   2591   7390    Matthew 	/*
   2592   7390    Matthew 	 * Change space accounting.
   2593   7390    Matthew 	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
   2594   7390    Matthew 	 * both be valid, or both be 0 (resulting in delta == 0).  This
   2595   7390    Matthew 	 * is true for each of {clone,origin} independently.
   2596   7390    Matthew 	 */
   2597   7390    Matthew 
   2598   7390    Matthew 	delta = pa->cloneusedsnap -
   2599   7390    Matthew 	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
   2600   7390    Matthew 	ASSERT3S(delta, >=, 0);
   2601   7390    Matthew 	ASSERT3U(pa->used, >=, delta);
   2602   7390    Matthew 	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
   2603   7390    Matthew 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
   2604   7390    Matthew 	    pa->used - delta, pa->comp, pa->uncomp, tx);
   2605   7390    Matthew 
   2606   7390    Matthew 	delta = pa->originusedsnap -
   2607   7390    Matthew 	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
   2608   7390    Matthew 	ASSERT3S(delta, <=, 0);
   2609   7390    Matthew 	ASSERT3U(pa->used, >=, -delta);
   2610   7390    Matthew 	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
   2611   7390    Matthew 	dsl_dir_diduse_space(odd, DD_USED_HEAD,
   2612   7390    Matthew 	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
   2613   7390    Matthew 
   2614   5367     ahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
   2615   2082   eschrock 
   2616   4543      marks 	/* log history record */
   2617   4543      marks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
   2618   6689     maybee 	    cr, "dataset = %llu", hds->ds_object);
   2619   4543      marks 
   2620   5367     ahrens 	dsl_dir_close(odd, FTAG);
   2621   2082   eschrock }
   2622   2082   eschrock 
   2623   7390    Matthew static char *snaplist_tag = "snaplist";
   2624   7390    Matthew /*
   2625   7390    Matthew  * Make a list of dsl_dataset_t's for the snapshots between first_obj
   2626   7390    Matthew  * (exclusive) and last_obj (inclusive).  The list will be in reverse
   2627   7390    Matthew  * order (last_obj will be the list_head()).  If first_obj == 0, do all
   2628   7390    Matthew  * snapshots back to this dataset's origin.
   2629   7390    Matthew  */
   2630   7390    Matthew static int
   2631   7390    Matthew snaplist_make(dsl_pool_t *dp, boolean_t own,
   2632   7390    Matthew     uint64_t first_obj, uint64_t last_obj, list_t *l)
   2633   7390    Matthew {
   2634   7390    Matthew 	uint64_t obj = last_obj;
   2635   7390    Matthew 
   2636   7390    Matthew 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
   2637   7390    Matthew 
   2638   7390    Matthew 	list_create(l, sizeof (struct promotenode),
   2639   7390    Matthew 	    offsetof(struct promotenode, link));
   2640   7390    Matthew 
   2641   7390    Matthew 	while (obj != first_obj) {
   2642   7390    Matthew 		dsl_dataset_t *ds;
   2643   7390    Matthew 		struct promotenode *snap;
   2644   7390    Matthew 		int err;
   2645   7390    Matthew 
   2646   7390    Matthew 		if (own) {
   2647   7390    Matthew 			err = dsl_dataset_own_obj(dp, obj,
   2648   7390    Matthew 			    0, snaplist_tag, &ds);
   2649   7390    Matthew 			if (err == 0)
   2650   7390    Matthew 				dsl_dataset_make_exclusive(ds, snaplist_tag);
   2651   7390    Matthew 		} else {
   2652   7390    Matthew 			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
   2653   7390    Matthew 		}
   2654   7390    Matthew 		if (err == ENOENT) {
   2655   7390    Matthew 			/* lost race with snapshot destroy */
   2656   7390    Matthew 			struct promotenode *last = list_tail(l);
   2657   7390    Matthew 			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
   2658   7390    Matthew 			obj = last->ds->ds_phys->ds_prev_snap_obj;
   2659   7390    Matthew 			continue;
   2660   7390    Matthew 		} else if (err) {
   2661   7390    Matthew 			return (err);
   2662   7390    Matthew 		}
   2663   7390    Matthew 
   2664   7390    Matthew 		if (first_obj == 0)
   2665   7390    Matthew 			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
   2666   7390    Matthew 
   2667   7390    Matthew 		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
   2668   7390    Matthew 		snap->ds = ds;
   2669   7390    Matthew 		list_insert_tail(l, snap);
   2670   7390    Matthew 		obj = ds->ds_phys->ds_prev_snap_obj;
   2671   7390    Matthew 	}
   2672   7390    Matthew 
   2673   7390    Matthew 	return (0);
   2674   7390    Matthew }
   2675   7390    Matthew 
   2676   7390    Matthew static int
   2677   7390    Matthew snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
   2678   7390    Matthew {
   2679   7390    Matthew 	struct promotenode *snap;
   2680   7390    Matthew 
   2681   7390    Matthew 	*spacep = 0;
   2682   7390    Matthew 	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
   2683   7390    Matthew 		uint64_t used;
   2684   7390    Matthew 		int err = bplist_space_birthrange(&snap->ds->ds_deadlist,
   2685   7390    Matthew 		    mintxg, UINT64_MAX, &used);
   2686   7390    Matthew 		if (err)
   2687   7390    Matthew 			return (err);
   2688   7390    Matthew 		*spacep += used;
   2689   7390    Matthew 	}
   2690   7390    Matthew 	return (0);
   2691   7390    Matthew }
   2692   7390    Matthew 
   2693   7390    Matthew static void
   2694   7390    Matthew snaplist_destroy(list_t *l, boolean_t own)
   2695   7390    Matthew {
   2696   7390    Matthew 	struct promotenode *snap;
   2697   7390    Matthew 
   2698   8779       Mark 	if (!l || !list_link_active(&l->list_head))
   2699   7390    Matthew 		return;
   2700   7390    Matthew 
   2701   7390    Matthew 	while ((snap = list_tail(l)) != NULL) {
   2702   7390    Matthew 		list_remove(l, snap);
   2703   7390    Matthew 		if (own)
   2704   7390    Matthew 			dsl_dataset_disown(snap->ds, snaplist_tag);
   2705   7390    Matthew 		else
   2706   7390    Matthew 			dsl_dataset_rele(snap->ds, snaplist_tag);
   2707   7390    Matthew 		kmem_free(snap, sizeof (struct promotenode));
   2708   7390    Matthew 	}
   2709   7390    Matthew 	list_destroy(l);
   2710   7390    Matthew }
   2711   7390    Matthew 
   2712   7390    Matthew /*
   2713   7390    Matthew  * Promote a clone.  Nomenclature note:
   2714   7390    Matthew  * "clone" or "cds": the original clone which is being promoted
   2715   7390    Matthew  * "origin" or "ods": the snapshot which is originally clone's origin
   2716   7390    Matthew  * "origin head" or "ohds": the dataset which is the head
   2717   7390    Matthew  * (filesystem/volume) for the origin
   2718   7390    Matthew  * "origin origin": the origin of the origin's filesystem (typically
   2719   7390    Matthew  * NULL, indicating that the clone is not a clone of a clone).
   2720   7390    Matthew  */
   2721   2082   eschrock int
   2722  10588       Eric dsl_dataset_promote(const char *name, char *conflsnap)
   2723   2082   eschrock {
   2724   2082   eschrock 	dsl_dataset_t *ds;
   2725   6689     maybee 	dsl_dir_t *dd;
   2726   6689     maybee 	dsl_pool_t *dp;
   2727   2082   eschrock 	dmu_object_info_t doi;
   2728   7390    Matthew 	struct promotearg pa = { 0 };
   2729   7046     ahrens 	struct promotenode *snap;
   2730   6689     maybee 	int err;
   2731   2082   eschrock 
   2732   6689     maybee 	err = dsl_dataset_hold(name, FTAG, &ds);
   2733   2082   eschrock 	if (err)
   2734   2082   eschrock 		return (err);
   2735   6689     maybee 	dd = ds->ds_dir;
   2736   6689     maybee 	dp = dd->dd_pool;
   2737   2082   eschrock 
   2738   6689     maybee 	err = dmu_object_info(dp->dp_meta_objset,
   2739   2082   eschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
   2740   2082   eschrock 	if (err) {
   2741   6689     maybee 		dsl_dataset_rele(ds, FTAG);
   2742   2082   eschrock 		return (err);
   2743   2082   eschrock 	}
   2744   6689     maybee 
   2745   7390    Matthew 	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
   2746   7390    Matthew 		dsl_dataset_rele(ds, FTAG);
   2747   7390    Matthew 		return (EINVAL);
   2748   7390    Matthew 	}
   2749   7390    Matthew 
   2750   6689     maybee 	/*
   2751   6689     maybee 	 * We are going to inherit all the snapshots taken before our
   2752   6689     maybee 	 * origin (i.e., our new origin will be our parent's origin).
   2753   6689     maybee 	 * Take ownership of them so that we can rename them into our
   2754   6689     maybee 	 * namespace.
   2755   6689     maybee 	 */
   2756   6689     maybee 	rw_enter(&dp->dp_config_rwlock, RW_READER);
   2757   7046     ahrens 
   2758   7390    Matthew 	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
   2759   7390    Matthew 	    &pa.shared_snaps);
   2760   7390    Matthew 	if (err != 0)
   2761   7390    Matthew 		goto out;
   2762   7046     ahrens 
   2763   7390    Matthew 	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
   2764   7390    Matthew 	if (err != 0)
   2765   7390    Matthew 		goto out;
   2766   7390    Matthew 
   2767   7390    Matthew 	snap = list_head(&pa.shared_snaps);
   2768   7390    Matthew 	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
   2769   7390    Matthew 	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
   2770   7390    Matthew 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
   2771   7390    Matthew 	if (err != 0)
   2772   7390    Matthew 		goto out;
   2773   7390    Matthew 
   2774   7390    Matthew 	if (dsl_dir_is_clone(snap->ds->ds_dir)) {
   2775   7390    Matthew 		err = dsl_dataset_own_obj(dp,
   2776   7390    Matthew 		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
   2777   7390    Matthew 		    0, FTAG, &pa.origin_origin);
   2778   7390    Matthew 		if (err != 0)
   2779   6689     maybee 			goto out;
   2780   7390    Matthew 	}
   2781   7046     ahrens 
   2782   7390    Matthew out:
   2783   6689     maybee 	rw_exit(&dp->dp_config_rwlock);
   2784   2082   eschrock 
   2785   2082   eschrock 	/*
   2786   2082   eschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
   2787   2082   eschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
   2788   2082   eschrock 	 * bonus buffers.
   2789   2082   eschrock 	 */
   2790   7390    Matthew 	if (err == 0) {
   2791   7390    Matthew 		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
   2792   7390    Matthew 		    dsl_dataset_promote_sync, ds, &pa,
   2793  10922       Jeff 		    2 + 2 * doi.doi_physical_blocks_512);
   2794  10588       Eric 		if (err && pa.err_ds && conflsnap)
   2795