1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 8517 Eric * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens #include <sys/dmu_objset.h> 27 789 ahrens #include <sys/dsl_dataset.h> 28 789 ahrens #include <sys/dsl_dir.h> 29 2082 eschrock #include <sys/dsl_prop.h> 30 2199 ahrens #include <sys/dsl_synctask.h> 31 789 ahrens #include <sys/dmu_traverse.h> 32 789 ahrens #include <sys/dmu_tx.h> 33 789 ahrens #include <sys/arc.h> 34 789 ahrens #include <sys/zio.h> 35 789 ahrens #include <sys/zap.h> 36 789 ahrens #include <sys/unique.h> 37 789 ahrens #include <sys/zfs_context.h> 38 4007 mmusante #include <sys/zfs_ioctl.h> 39 4543 marks #include <sys/spa.h> 40 7046 ahrens #include <sys/zfs_znode.h> 41 10242 chris #include <sys/zvol.h> 42 1731 bonwick 43 6689 maybee static char *dsl_reaper = "the grim reaper"; 44 6689 maybee 45 2199 ahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 46 2199 ahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 47 5378 ck153898 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 48 789 ahrens 49 3444 ek110237 #define DS_REF_MAX (1ULL << 62) 50 789 ahrens 51 789 ahrens #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 52 789 ahrens 53 6689 maybee #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 54 6689 maybee 55 789 ahrens 56 5378 ck153898 /* 57 5378 ck153898 * Figure out how much of this delta should be propogated to the dsl_dir 58 5378 ck153898 * layer. If there's a refreservation, that space has already been 59 5378 ck153898 * partially accounted for in our ancestors. 60 5378 ck153898 */ 61 5378 ck153898 static int64_t 62 5378 ck153898 parent_delta(dsl_dataset_t *ds, int64_t delta) 63 5378 ck153898 { 64 5378 ck153898 uint64_t old_bytes, new_bytes; 65 5378 ck153898 66 5378 ck153898 if (ds->ds_reserved == 0) 67 5378 ck153898 return (delta); 68 5378 ck153898 69 5378 ck153898 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 70 5378 ck153898 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 71 5378 ck153898 72 5378 ck153898 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 73 5378 ck153898 return (new_bytes - old_bytes); 74 5378 ck153898 } 75 789 ahrens 76 789 ahrens void 77 10922 Jeff dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 78 789 ahrens { 79 10922 Jeff int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 80 789 ahrens int compressed = BP_GET_PSIZE(bp); 81 789 ahrens int uncompressed = BP_GET_UCSIZE(bp); 82 5378 ck153898 int64_t delta; 83 789 ahrens 84 789 ahrens dprintf_bp(bp, "born, ds=%p\n", ds); 85 789 ahrens 86 789 ahrens ASSERT(dmu_tx_is_syncing(tx)); 87 789 ahrens /* It could have been compressed away to nothing */ 88 789 ahrens if (BP_IS_HOLE(bp)) 89 789 ahrens return; 90 789 ahrens ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 91 789 ahrens ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 92 789 ahrens if (ds == NULL) { 93 789 ahrens /* 94 789 ahrens * Account for the meta-objset space in its placeholder 95 789 ahrens * dsl_dir. 96 789 ahrens */ 97 789 ahrens ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 98 7390 Matthew dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 99 789 ahrens used, compressed, uncompressed, tx); 100 789 ahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 101 789 ahrens return; 102 789 ahrens } 103 789 ahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 104 7595 Matthew mutex_enter(&ds->ds_dir->dd_lock); 105 789 ahrens mutex_enter(&ds->ds_lock); 106 5378 ck153898 delta = parent_delta(ds, used); 107 789 ahrens ds->ds_phys->ds_used_bytes += used; 108 789 ahrens ds->ds_phys->ds_compressed_bytes += compressed; 109 789 ahrens ds->ds_phys->ds_uncompressed_bytes += uncompressed; 110 789 ahrens ds->ds_phys->ds_unique_bytes += used; 111 789 ahrens mutex_exit(&ds->ds_lock); 112 7390 Matthew dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 113 7390 Matthew compressed, uncompressed, tx); 114 7390 Matthew dsl_dir_transfer_space(ds->ds_dir, used - delta, 115 7390 Matthew DD_USED_REFRSRV, DD_USED_HEAD, tx); 116 7595 Matthew mutex_exit(&ds->ds_dir->dd_lock); 117 789 ahrens } 118 789 ahrens 119 6992 maybee int 120 10922 Jeff dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 121 10922 Jeff boolean_t async) 122 789 ahrens { 123 10922 Jeff if (BP_IS_HOLE(bp)) 124 10922 Jeff return (0); 125 10922 Jeff 126 10922 Jeff ASSERT(dmu_tx_is_syncing(tx)); 127 10922 Jeff ASSERT(bp->blk_birth <= tx->tx_txg); 128 10922 Jeff 129 10922 Jeff int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 130 789 ahrens int compressed = BP_GET_PSIZE(bp); 131 789 ahrens int uncompressed = BP_GET_UCSIZE(bp); 132 789 ahrens 133 789 ahrens ASSERT(used > 0); 134 789 ahrens if (ds == NULL) { 135 789 ahrens /* 136 789 ahrens * Account for the meta-objset space in its placeholder 137 789 ahrens * dataset. 138 789 ahrens */ 139 10922 Jeff dsl_free(tx->tx_pool, tx->tx_txg, bp); 140 789 ahrens 141 7390 Matthew dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 142 789 ahrens -used, -compressed, -uncompressed, tx); 143 789 ahrens dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 144 6992 maybee return (used); 145 789 ahrens } 146 789 ahrens ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 147 789 ahrens 148 7390 Matthew ASSERT(!dsl_dataset_is_snapshot(ds)); 149 789 ahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 150 789 ahrens 151 789 ahrens if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 152 5378 ck153898 int64_t delta; 153 3547 maybee 154 789 ahrens dprintf_bp(bp, "freeing: %s", ""); 155 10922 Jeff dsl_free(tx->tx_pool, tx->tx_txg, bp); 156 789 ahrens 157 7595 Matthew mutex_enter(&ds->ds_dir->dd_lock); 158 789 ahrens mutex_enter(&ds->ds_lock); 159 5378 ck153898 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 160 5378 ck153898 !DS_UNIQUE_IS_ACCURATE(ds)); 161 5378 ck153898 delta = parent_delta(ds, -used); 162 789 ahrens ds->ds_phys->ds_unique_bytes -= used; 163 789 ahrens mutex_exit(&ds->ds_lock); 164 7390 Matthew dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 165 5378 ck153898 delta, -compressed, -uncompressed, tx); 166 7390 Matthew dsl_dir_transfer_space(ds->ds_dir, -used - delta, 167 7390 Matthew DD_USED_REFRSRV, DD_USED_HEAD, tx); 168 7595 Matthew mutex_exit(&ds->ds_dir->dd_lock); 169 789 ahrens } else { 170 789 ahrens dprintf_bp(bp, "putting on dead list: %s", ""); 171 10922 Jeff if (async) { 172 10922 Jeff /* 173 10922 Jeff * We are here as part of zio's write done callback, 174 10922 Jeff * which means we're a zio interrupt thread. We can't 175 10922 Jeff * call bplist_enqueue() now because it may block 176 10922 Jeff * waiting for I/O. Instead, put bp on the deferred 177 10922 Jeff * queue and let dsl_pool_sync() finish the job. 178 10922 Jeff */ 179 10922 Jeff bplist_enqueue_deferred(&ds->ds_deadlist, bp); 180 10922 Jeff } else { 181 10922 Jeff VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 182 10922 Jeff } 183 5712 ahrens ASSERT3U(ds->ds_prev->ds_object, ==, 184 5712 ahrens ds->ds_phys->ds_prev_snap_obj); 185 5712 ahrens ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 186 789 ahrens /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 187 5712 ahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == 188 5712 ahrens ds->ds_object && bp->blk_birth > 189 5712 ahrens ds->ds_prev->ds_phys->ds_prev_snap_txg) { 190 5712 ahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 191 5712 ahrens mutex_enter(&ds->ds_prev->ds_lock); 192 5712 ahrens ds->ds_prev->ds_phys->ds_unique_bytes += used; 193 5712 ahrens mutex_exit(&ds->ds_prev->ds_lock); 194 7390 Matthew } 195 7390 Matthew if (bp->blk_birth > ds->ds_origin_txg) { 196 7390 Matthew dsl_dir_transfer_space(ds->ds_dir, used, 197 7390 Matthew DD_USED_HEAD, DD_USED_SNAP, tx); 198 789 ahrens } 199 789 ahrens } 200 789 ahrens mutex_enter(&ds->ds_lock); 201 789 ahrens ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 202 789 ahrens ds->ds_phys->ds_used_bytes -= used; 203 789 ahrens ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 204 789 ahrens ds->ds_phys->ds_compressed_bytes -= compressed; 205 789 ahrens ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 206 789 ahrens ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 207 789 ahrens mutex_exit(&ds->ds_lock); 208 6992 maybee 209 6992 maybee return (used); 210 789 ahrens } 211 789 ahrens 212 1544 eschrock uint64_t 213 1544 eschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 214 789 ahrens { 215 2885 ahrens uint64_t trysnap = 0; 216 2885 ahrens 217 789 ahrens if (ds == NULL) 218 1544 eschrock return (0); 219 789 ahrens /* 220 789 ahrens * The snapshot creation could fail, but that would cause an 221 789 ahrens * incorrect FALSE return, which would only result in an 222 789 ahrens * overestimation of the amount of space that an operation would 223 789 ahrens * consume, which is OK. 224 789 ahrens * 225 789 ahrens * There's also a small window where we could miss a pending 226 789 ahrens * snapshot, because we could set the sync task in the quiescing 227 789 ahrens * phase. So this should only be used as a guess. 228 789 ahrens */ 229 2885 ahrens if (ds->ds_trysnap_txg > 230 2885 ahrens spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 231 2885 ahrens trysnap = ds->ds_trysnap_txg; 232 2885 ahrens return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 233 1544 eschrock } 234 1544 eschrock 235 9653 Sanjeev boolean_t 236 1544 eschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 237 1544 eschrock { 238 1544 eschrock return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 239 789 ahrens } 240 789 ahrens 241 789 ahrens /* ARGSUSED */ 242 789 ahrens static void 243 789 ahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv) 244 789 ahrens { 245 789 ahrens dsl_dataset_t *ds = dsv; 246 789 ahrens 247 6689 maybee ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 248 789 ahrens 249 4787 ahrens unique_remove(ds->ds_fsid_guid); 250 789 ahrens 251 10298 Matthew if (ds->ds_objset != NULL) 252 10298 Matthew dmu_objset_evict(ds->ds_objset); 253 789 ahrens 254 789 ahrens if (ds->ds_prev) { 255 6689 maybee dsl_dataset_drop_ref(ds->ds_prev, ds); 256 789 ahrens ds->ds_prev = NULL; 257 789 ahrens } 258 789 ahrens 259 789 ahrens bplist_close(&ds->ds_deadlist); 260 6689 maybee if (ds->ds_dir) 261 6689 maybee dsl_dir_close(ds->ds_dir, ds); 262 789 ahrens 263 4787 ahrens ASSERT(!list_link_active(&ds->ds_synced_link)); 264 789 ahrens 265 2856 nd150628 mutex_destroy(&ds->ds_lock); 266 10204 Matthew mutex_destroy(&ds->ds_recvlock); 267 4787 ahrens mutex_destroy(&ds->ds_opening_lock); 268 6689 maybee rw_destroy(&ds->ds_rwlock); 269 6689 maybee cv_destroy(&ds->ds_exclusive_cv); 270 10922 Jeff bplist_fini(&ds->ds_deadlist); 271 2856 nd150628 272 789 ahrens kmem_free(ds, sizeof (dsl_dataset_t)); 273 789 ahrens } 274 789 ahrens 275 1544 eschrock static int 276 789 ahrens dsl_dataset_get_snapname(dsl_dataset_t *ds) 277 789 ahrens { 278 789 ahrens dsl_dataset_phys_t *headphys; 279 789 ahrens int err; 280 789 ahrens dmu_buf_t *headdbuf; 281 789 ahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 282 789 ahrens objset_t *mos = dp->dp_meta_objset; 283 789 ahrens 284 789 ahrens if (ds->ds_snapname[0]) 285 1544 eschrock return (0); 286 789 ahrens if (ds->ds_phys->ds_next_snap_obj == 0) 287 1544 eschrock return (0); 288 789 ahrens 289 1544 eschrock err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 290 1544 eschrock FTAG, &headdbuf); 291 1544 eschrock if (err) 292 1544 eschrock return (err); 293 789 ahrens headphys = headdbuf->db_data; 294 789 ahrens err = zap_value_search(dp->dp_meta_objset, 295 4577 ahrens headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 296 1544 eschrock dmu_buf_rele(headdbuf, FTAG); 297 1544 eschrock return (err); 298 789 ahrens } 299 789 ahrens 300 6492 timh static int 301 6689 maybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 302 6492 timh { 303 6689 maybee objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 304 6689 maybee uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 305 6492 timh matchtype_t mt; 306 6492 timh int err; 307 6492 timh 308 6689 maybee if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 309 6492 timh mt = MT_FIRST; 310 6492 timh else 311 6492 timh mt = MT_EXACT; 312 6492 timh 313 6689 maybee err = zap_lookup_norm(mos, snapobj, name, 8, 1, 314 6492 timh value, mt, NULL, 0, NULL); 315 6492 timh if (err == ENOTSUP && mt == MT_FIRST) 316 6689 maybee err = zap_lookup(mos, snapobj, name, 8, 1, value); 317 6492 timh return (err); 318 6492 timh } 319 6492 timh 320 6492 timh static int 321 6689 maybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 322 6492 timh { 323 6689 maybee objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 324 6689 maybee uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 325 6492 timh matchtype_t mt; 326 6492 timh int err; 327 10373 chris 328 10373 chris dsl_dir_snap_cmtime_update(ds->ds_dir); 329 6492 timh 330 6689 maybee if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 331 6492 timh mt = MT_FIRST; 332 6492 timh else 333 6492 timh mt = MT_EXACT; 334 6492 timh 335 6689 maybee err = zap_remove_norm(mos, snapobj, name, mt, tx); 336 6492 timh if (err == ENOTSUP && mt == MT_FIRST) 337 6689 maybee err = zap_remove(mos, snapobj, name, tx); 338 6492 timh return (err); 339 6492 timh } 340 6492 timh 341 6689 maybee static int 342 6689 maybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 343 6689 maybee dsl_dataset_t **dsp) 344 789 ahrens { 345 789 ahrens objset_t *mos = dp->dp_meta_objset; 346 789 ahrens dmu_buf_t *dbuf; 347 789 ahrens dsl_dataset_t *ds; 348 1544 eschrock int err; 349 789 ahrens 350 789 ahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 351 789 ahrens dsl_pool_sync_context(dp)); 352 789 ahrens 353 1544 eschrock err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 354 1544 eschrock if (err) 355 1544 eschrock return (err); 356 789 ahrens ds = dmu_buf_get_user(dbuf); 357 789 ahrens if (ds == NULL) { 358 789 ahrens dsl_dataset_t *winner; 359 789 ahrens 360 789 ahrens ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 361 789 ahrens ds->ds_dbuf = dbuf; 362 789 ahrens ds->ds_object = dsobj; 363 789 ahrens ds->ds_phys = dbuf->db_data; 364 789 ahrens 365 2856 nd150628 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 366 10204 Matthew mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 367 4787 ahrens mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 368 6689 maybee rw_init(&ds->ds_rwlock, 0, 0, 0); 369 6689 maybee cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 370 10922 Jeff bplist_init(&ds->ds_deadlist); 371 2856 nd150628 372 1544 eschrock err = bplist_open(&ds->ds_deadlist, 373 789 ahrens mos, ds->ds_phys->ds_deadlist_obj); 374 1544 eschrock if (err == 0) { 375 1544 eschrock err = dsl_dir_open_obj(dp, 376 1544 eschrock ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 377 1544 eschrock } 378 1544 eschrock if (err) { 379 1544 eschrock /* 380 1544 eschrock * we don't really need to close the blist if we 381 1544 eschrock * just opened it. 382 1544 eschrock */ 383 2856 nd150628 mutex_destroy(&ds->ds_lock); 384 10204 Matthew mutex_destroy(&ds->ds_recvlock); 385 4787 ahrens mutex_destroy(&ds->ds_opening_lock); 386 6689 maybee rw_destroy(&ds->ds_rwlock); 387 6689 maybee cv_destroy(&ds->ds_exclusive_cv); 388 10922 Jeff bplist_fini(&ds->ds_deadlist); 389 1544 eschrock kmem_free(ds, sizeof (dsl_dataset_t)); 390 1544 eschrock dmu_buf_rele(dbuf, tag); 391 1544 eschrock return (err); 392 1544 eschrock } 393 789 ahrens 394 7390 Matthew if (!dsl_dataset_is_snapshot(ds)) { 395 789 ahrens ds->ds_snapname[0] = '\0'; 396 789 ahrens if (ds->ds_phys->ds_prev_snap_obj) { 397 6689 maybee err = dsl_dataset_get_ref(dp, 398 6689 maybee ds->ds_phys->ds_prev_snap_obj, 399 6689 maybee ds, &ds->ds_prev); 400 789 ahrens } 401 7390 Matthew 402 7390 Matthew if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 403 7390 Matthew dsl_dataset_t *origin; 404 7390 Matthew 405 7390 Matthew err = dsl_dataset_hold_obj(dp, 406 7390 Matthew ds->ds_dir->dd_phys->dd_origin_obj, 407 7390 Matthew FTAG, &origin); 408 7390 Matthew if (err == 0) { 409 7390 Matthew ds->ds_origin_txg = 410 7390 Matthew origin->ds_phys->ds_creation_txg; 411 7390 Matthew dsl_dataset_rele(origin, FTAG); 412 7390 Matthew } 413 7390 Matthew } 414 10242 chris } else { 415 10242 chris if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 416 10242 chris err = dsl_dataset_get_snapname(ds); 417 10242 chris if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 418 10242 chris err = zap_count( 419 10242 chris ds->ds_dir->dd_pool->dp_meta_objset, 420 10242 chris ds->ds_phys->ds_userrefs_obj, 421 10242 chris &ds->ds_userrefs); 422 10242 chris } 423 789 ahrens } 424 789 ahrens 425 7390 Matthew if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 426 5569 ck153898 /* 427 5569 ck153898 * In sync context, we're called with either no lock 428 5569 ck153898 * or with the write lock. If we're not syncing, 429 5569 ck153898 * we're always called with the read lock held. 430 5569 ck153898 */ 431 5475 ck153898 boolean_t need_lock = 432 5569 ck153898 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 433 5569 ck153898 dsl_pool_sync_context(dp); 434 5475 ck153898 435 5475 ck153898 if (need_lock) 436 5475 ck153898 rw_enter(&dp->dp_config_rwlock, RW_READER); 437 5475 ck153898 438 7265 ahrens err = dsl_prop_get_ds(ds, 439 5475 ck153898 "refreservation", sizeof (uint64_t), 1, 440 5475 ck153898 &ds->ds_reserved, NULL); 441 5475 ck153898 if (err == 0) { 442 7265 ahrens err = dsl_prop_get_ds(ds, 443 5475 ck153898 "refquota", sizeof (uint64_t), 1, 444 5475 ck153898 &ds->ds_quota, NULL); 445 5475 ck153898 } 446 5475 ck153898 447 5475 ck153898 if (need_lock) 448 5475 ck153898 rw_exit(&dp->dp_config_rwlock); 449 5475 ck153898 } else { 450 5475 ck153898 ds->ds_reserved = ds->ds_quota = 0; 451 5475 ck153898 } 452 5475 ck153898 453 1544 eschrock if (err == 0) { 454 1544 eschrock winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 455 1544 eschrock dsl_dataset_evict); 456 1544 eschrock } 457 1544 eschrock if (err || winner) { 458 789 ahrens bplist_close(&ds->ds_deadlist); 459 6689 maybee if (ds->ds_prev) 460 6689 maybee dsl_dataset_drop_ref(ds->ds_prev, ds); 461 789 ahrens dsl_dir_close(ds->ds_dir, ds); 462 2856 nd150628 mutex_destroy(&ds->ds_lock); 463 10204 Matthew mutex_destroy(&ds->ds_recvlock); 464 4787 ahrens mutex_destroy(&ds->ds_opening_lock); 465 6689 maybee rw_destroy(&ds->ds_rwlock); 466 6689 maybee cv_destroy(&ds->ds_exclusive_cv); 467 10922 Jeff bplist_fini(&ds->ds_deadlist); 468 789 ahrens kmem_free(ds, sizeof (dsl_dataset_t)); 469 1544 eschrock if (err) { 470 1544 eschrock dmu_buf_rele(dbuf, tag); 471 1544 eschrock return (err); 472 1544 eschrock } 473 789 ahrens ds = winner; 474 789 ahrens } else { 475 4787 ahrens ds->ds_fsid_guid = 476 789 ahrens unique_insert(ds->ds_phys->ds_fsid_guid); 477 5378 ck153898 } 478 789 ahrens } 479 789 ahrens ASSERT3P(ds->ds_dbuf, ==, dbuf); 480 789 ahrens ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 481 7046 ahrens ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 482 7061 ahrens spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 483 7077 ahrens dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 484 789 ahrens mutex_enter(&ds->ds_lock); 485 6689 maybee if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 486 789 ahrens mutex_exit(&ds->ds_lock); 487 6689 maybee dmu_buf_rele(ds->ds_dbuf, tag); 488 6689 maybee return (ENOENT); 489 789 ahrens } 490 789 ahrens mutex_exit(&ds->ds_lock); 491 1544 eschrock *dsp = ds; 492 1544 eschrock return (0); 493 789 ahrens } 494 789 ahrens 495 6689 maybee static int 496 6689 maybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 497 6689 maybee { 498 6689 maybee dsl_pool_t *dp = ds->ds_dir->dd_pool; 499 6689 maybee 500 6689 maybee /* 501 6689 maybee * In syncing context we don't want the rwlock lock: there 502 6689 maybee * may be an existing writer waiting for sync phase to 503 6689 maybee * finish. We don't need to worry about such writers, since 504 6689 maybee * sync phase is single-threaded, so the writer can't be 505 6689 maybee * doing anything while we are active. 506 6689 maybee */ 507 6689 maybee if (dsl_pool_sync_context(dp)) { 508 6689 maybee ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 509 6689 maybee return (0); 510 6689 maybee } 511 6689 maybee 512 6689 maybee /* 513 6689 maybee * Normal users will hold the ds_rwlock as a READER until they 514 6689 maybee * are finished (i.e., call dsl_dataset_rele()). "Owners" will 515 6689 maybee * drop their READER lock after they set the ds_owner field. 516 6689 maybee * 517 6689 maybee * If the dataset is being destroyed, the destroy thread will 518 6689 maybee * obtain a WRITER lock for exclusive access after it's done its 519 6689 maybee * open-context work and then change the ds_owner to 520 6689 maybee * dsl_reaper once destruction is assured. So threads 521 6689 maybee * may block here temporarily, until the "destructability" of 522 6689 maybee * the dataset is determined. 523 6689 maybee */ 524 6689 maybee ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 525 6689 maybee mutex_enter(&ds->ds_lock); 526 6689 maybee while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 527 6689 maybee rw_exit(&dp->dp_config_rwlock); 528 6689 maybee cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 529 6689 maybee if (DSL_DATASET_IS_DESTROYED(ds)) { 530 6689 maybee mutex_exit(&ds->ds_lock); 531 6689 maybee dsl_dataset_drop_ref(ds, tag); 532 6689 maybee rw_enter(&dp->dp_config_rwlock, RW_READER); 533 6689 maybee return (ENOENT); 534 6689 maybee } 535 6689 maybee rw_enter(&dp->dp_config_rwlock, RW_READER); 536 6689 maybee } 537 6689 maybee mutex_exit(&ds->ds_lock); 538 6689 maybee return (0); 539 6689 maybee } 540 6689 maybee 541 789 ahrens int 542 6689 maybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 543 6689 maybee dsl_dataset_t **dsp) 544 6689 maybee { 545 6689 maybee int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 546 6689 maybee 547 6689 maybee if (err) 548 6689 maybee return (err); 549 6689 maybee return (dsl_dataset_hold_ref(*dsp, tag)); 550 6689 maybee } 551 6689 maybee 552 6689 maybee int 553 10298 Matthew dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 554 10298 Matthew void *tag, dsl_dataset_t **dsp) 555 6689 maybee { 556 10298 Matthew int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 557 6689 maybee if (err) 558 6689 maybee return (err); 559 10298 Matthew if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 560 10298 Matthew dsl_dataset_rele(*dsp, tag); 561 8779 Mark *dsp = NULL; 562 6689 maybee return (EBUSY); 563 6689 maybee } 564 6689 maybee return (0); 565 6689 maybee } 566 6689 maybee 567 6689 maybee int 568 6689 maybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 569 789 ahrens { 570 789 ahrens dsl_dir_t *dd; 571 789 ahrens dsl_pool_t *dp; 572 6689 maybee const char *snapname; 573 789 ahrens uint64_t obj; 574 789 ahrens int err = 0; 575 789 ahrens 576 6689 maybee err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 577 1544 eschrock if (err) 578 1544 eschrock return (err); 579 789 ahrens 580 789 ahrens dp = dd->dd_pool; 581 789 ahrens obj = dd->dd_phys->dd_head_dataset_obj; 582 789 ahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 583 6689 maybee if (obj) 584 6689 maybee err = dsl_dataset_get_ref(dp, obj, tag, dsp); 585 6689 maybee else 586 789 ahrens err = ENOENT; 587 6689 maybee if (err) 588 789 ahrens goto out; 589 789 ahrens 590 6689 maybee err = dsl_dataset_hold_ref(*dsp, tag); 591 789 ahrens 592 6689 maybee /* we may be looking for a snapshot */ 593 6689 maybee if (err == 0 && snapname != NULL) { 594 6689 maybee dsl_dataset_t *ds = NULL; 595 789 ahrens 596 6689 maybee if (*snapname++ != '@') { 597 6689 maybee dsl_dataset_rele(*dsp, tag); 598 789 ahrens err = ENOENT; 599 789 ahrens goto out; 600 789 ahrens } 601 789 ahrens 602 6689 maybee dprintf("looking for snapshot '%s'\n", snapname); 603 6689 maybee err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 604 6689 maybee if (err == 0) 605 6689 maybee err = dsl_dataset_get_ref(dp, obj, tag, &ds); 606 6689 maybee dsl_dataset_rele(*dsp, tag); 607 6689 maybee 608 6689 maybee ASSERT3U((err == 0), ==, (ds != NULL)); 609 6689 maybee 610 6689 maybee if (ds) { 611 6689 maybee mutex_enter(&ds->ds_lock); 612 6689 maybee if (ds->ds_snapname[0] == 0) 613 6689 maybee (void) strlcpy(ds->ds_snapname, snapname, 614 6689 maybee sizeof (ds->ds_snapname)); 615 6689 maybee mutex_exit(&ds->ds_lock); 616 6689 maybee err = dsl_dataset_hold_ref(ds, tag); 617 6689 maybee *dsp = err ? NULL : ds; 618 789 ahrens } 619 789 ahrens } 620 789 ahrens out: 621 789 ahrens rw_exit(&dp->dp_config_rwlock); 622 789 ahrens dsl_dir_close(dd, FTAG); 623 789 ahrens return (err); 624 789 ahrens } 625 789 ahrens 626 789 ahrens int 627 10298 Matthew dsl_dataset_own(const char *name, boolean_t inconsistentok, 628 10298 Matthew void *tag, dsl_dataset_t **dsp) 629 789 ahrens { 630 10298 Matthew int err = dsl_dataset_hold(name, tag, dsp); 631 6689 maybee if (err) 632 6689 maybee return (err); 633 10298 Matthew if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 634 10298 Matthew dsl_dataset_rele(*dsp, tag); 635 6689 maybee return (EBUSY); 636 6689 maybee } 637 6689 maybee return (0); 638 789 ahrens } 639 789 ahrens 640 789 ahrens void 641 789 ahrens dsl_dataset_name(dsl_dataset_t *ds, char *name) 642 789 ahrens { 643 789 ahrens if (ds == NULL) { 644 789 ahrens (void) strcpy(name, "mos"); 645 789 ahrens } else { 646 789 ahrens dsl_dir_name(ds->ds_dir, name); 647 1544 eschrock VERIFY(0 == dsl_dataset_get_snapname(ds)); 648 789 ahrens if (ds->ds_snapname[0]) { 649 789 ahrens (void) strcat(name, "@"); 650 6689 maybee /* 651 6689 maybee * We use a "recursive" mutex so that we 652 6689 maybee * can call dprintf_ds() with ds_lock held. 653 6689 maybee */ 654 789 ahrens if (!MUTEX_HELD(&ds->ds_lock)) { 655 789 ahrens mutex_enter(&ds->ds_lock); 656 789 ahrens (void) strcat(name, ds->ds_snapname); 657 789 ahrens mutex_exit(&ds->ds_lock); 658 789 ahrens } else { 659 789 ahrens (void) strcat(name, ds->ds_snapname); 660 789 ahrens } 661 789 ahrens } 662 789 ahrens } 663 3978 mmusante } 664 3978 mmusante 665 3978 mmusante static int 666 3978 mmusante dsl_dataset_namelen(dsl_dataset_t *ds) 667 3978 mmusante { 668 3978 mmusante int result; 669 3978 mmusante 670 3978 mmusante if (ds == NULL) { 671 3978 mmusante result = 3; /* "mos" */ 672 3978 mmusante } else { 673 3978 mmusante result = dsl_dir_namelen(ds->ds_dir); 674 3978 mmusante VERIFY(0 == dsl_dataset_get_snapname(ds)); 675 3978 mmusante if (ds->ds_snapname[0]) { 676 3978 mmusante ++result; /* adding one for the @-sign */ 677 3978 mmusante if (!MUTEX_HELD(&ds->ds_lock)) { 678 3978 mmusante mutex_enter(&ds->ds_lock); 679 3978 mmusante result += strlen(ds->ds_snapname); 680 3978 mmusante mutex_exit(&ds->ds_lock); 681 3978 mmusante } else { 682 3978 mmusante result += strlen(ds->ds_snapname); 683 3978 mmusante } 684 3978 mmusante } 685 3978 mmusante } 686 3978 mmusante 687 3978 mmusante return (result); 688 789 ahrens } 689 789 ahrens 690 7046 ahrens void 691 6689 maybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 692 789 ahrens { 693 1544 eschrock dmu_buf_rele(ds->ds_dbuf, tag); 694 789 ahrens } 695 789 ahrens 696 789 ahrens void 697 6689 maybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 698 5367 ahrens { 699 6689 maybee if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 700 6689 maybee rw_exit(&ds->ds_rwlock); 701 6689 maybee } 702 6689 maybee dsl_dataset_drop_ref(ds, tag); 703 6689 maybee } 704 6689 maybee 705 6689 maybee void 706 10298 Matthew dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 707 6689 maybee { 708 10298 Matthew ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 709 6689 maybee (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 710 6689 maybee 711 5367 ahrens mutex_enter(&ds->ds_lock); 712 6689 maybee ds->ds_owner = NULL; 713 6689 maybee if (RW_WRITE_HELD(&ds->ds_rwlock)) { 714 6689 maybee rw_exit(&ds->ds_rwlock); 715 6689 maybee cv_broadcast(&ds->ds_exclusive_cv); 716 6689 maybee } 717 5367 ahrens mutex_exit(&ds->ds_lock); 718 6689 maybee if (ds->ds_dbuf) 719 10298 Matthew dsl_dataset_drop_ref(ds, tag); 720 6689 maybee else 721 6689 maybee dsl_dataset_evict(ds->ds_dbuf, ds); 722 5367 ahrens } 723 5367 ahrens 724 5367 ahrens boolean_t 725 10298 Matthew dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 726 5367 ahrens { 727 6689 maybee boolean_t gotit = FALSE; 728 6689 maybee 729 5367 ahrens mutex_enter(&ds->ds_lock); 730 6689 maybee if (ds->ds_owner == NULL && 731 6689 maybee (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 732 10298 Matthew ds->ds_owner = tag; 733 6689 maybee if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 734 6689 maybee rw_exit(&ds->ds_rwlock); 735 6689 maybee gotit = TRUE; 736 5367 ahrens } 737 5367 ahrens mutex_exit(&ds->ds_lock); 738 6689 maybee return (gotit); 739 6689 maybee } 740 6689 maybee 741 6689 maybee void 742 6689 maybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 743 6689 maybee { 744 6689 maybee ASSERT3P(owner, ==, ds->ds_owner); 745 6689 maybee if (!RW_WRITE_HELD(&ds->ds_rwlock)) 746 6689 maybee rw_enter(&ds->ds_rwlock, RW_WRITER); 747 5367 ahrens } 748 5367 ahrens 749 2199 ahrens uint64_t 750 7046 ahrens dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 751 6492 timh uint64_t flags, dmu_tx_t *tx) 752 789 ahrens { 753 5367 ahrens dsl_pool_t *dp = dd->dd_pool; 754 789 ahrens dmu_buf_t *dbuf; 755 789 ahrens dsl_dataset_phys_t *dsphys; 756 5367 ahrens uint64_t dsobj; 757 789 ahrens objset_t *mos = dp->dp_meta_objset; 758 7046 ahrens 759 7046 ahrens if (origin == NULL) 760 7046 ahrens origin = dp->dp_origin_snap; 761 789 ahrens 762 5367 ahrens ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 763 5367 ahrens ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 764 789 ahrens ASSERT(dmu_tx_is_syncing(tx)); 765 5367 ahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 766 789 ahrens 767 928 tabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 768 928 tabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 769 1544 eschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 770 789 ahrens dmu_buf_will_dirty(dbuf, tx); 771 789 ahrens dsphys = dbuf->db_data; 772 6689 maybee bzero(dsphys, sizeof (dsl_dataset_phys_t)); 773 789 ahrens dsphys->ds_dir_obj = dd->dd_object; 774 6492 timh dsphys->ds_flags = flags; 775 789 ahrens dsphys->ds_fsid_guid = unique_create(); 776 789 ahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 777 789 ahrens sizeof (dsphys->ds_guid)); 778 789 ahrens dsphys->ds_snapnames_zapobj = 779 6492 timh zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 780 6492 timh DMU_OT_NONE, 0, tx); 781 789 ahrens dsphys->ds_creation_time = gethrestime_sec(); 782 7046 ahrens dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 783 789 ahrens dsphys->ds_deadlist_obj = 784 789 ahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 785 5378 ck153898 786 5367 ahrens if (origin) { 787 5367 ahrens dsphys->ds_prev_snap_obj = origin->ds_object; 788 789 ahrens dsphys->ds_prev_snap_txg = 789 5367 ahrens origin->ds_phys->ds_creation_txg; 790 789 ahrens dsphys->ds_used_bytes = 791 5367 ahrens origin->ds_phys->ds_used_bytes; 792 789 ahrens dsphys->ds_compressed_bytes = 793 5367 ahrens origin->ds_phys->ds_compressed_bytes; 794 789 ahrens dsphys->ds_uncompressed_bytes = 795 5367 ahrens origin->ds_phys->ds_uncompressed_bytes; 796 5367 ahrens dsphys->ds_bp = origin->ds_phys->ds_bp; 797 6502 timh dsphys->ds_flags |= origin->ds_phys->ds_flags; 798 789 ahrens 799 5367 ahrens dmu_buf_will_dirty(origin->ds_dbuf, tx); 800 5367 ahrens origin->ds_phys->ds_num_children++; 801 7046 ahrens 802 7046 ahrens if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 803 7046 ahrens if (origin->ds_phys->ds_next_clones_obj == 0) { 804 7046 ahrens origin->ds_phys->ds_next_clones_obj = 805 7046 ahrens zap_create(mos, 806 7046 ahrens DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 807 7046 ahrens } 808 7046 ahrens VERIFY(0 == zap_add_int(mos, 809 7046 ahrens origin->ds_phys->ds_next_clones_obj, 810 7046 ahrens dsobj, tx)); 811 7046 ahrens } 812 789 ahrens 813 789 ahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 814 5367 ahrens dd->dd_phys->dd_origin_obj = origin->ds_object; 815 789 ahrens } 816 6492 timh 817 6492 timh if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 818 6492 timh dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 819 6492 timh 820 1544 eschrock dmu_buf_rele(dbuf, FTAG); 821 789 ahrens 822 789 ahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 823 789 ahrens dd->dd_phys->dd_head_dataset_obj = dsobj; 824 5367 ahrens 825 5367 ahrens return (dsobj); 826 5367 ahrens } 827 5367 ahrens 828 5367 ahrens uint64_t 829 6492 timh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 830 6492 timh dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 831 5367 ahrens { 832 5367 ahrens dsl_pool_t *dp = pdd->dd_pool; 833 5367 ahrens uint64_t dsobj, ddobj; 834 5367 ahrens dsl_dir_t *dd; 835 5367 ahrens 836 5367 ahrens ASSERT(lastname[0] != '@'); 837 5367 ahrens 838 7046 ahrens ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 839 5367 ahrens VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 840 5367 ahrens 841 7046 ahrens dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 842 5367 ahrens 843 5367 ahrens dsl_deleg_set_create_perms(dd, tx, cr); 844 5367 ahrens 845 789 ahrens dsl_dir_close(dd, FTAG); 846 789 ahrens 847 2199 ahrens return (dsobj); 848 2199 ahrens } 849 2199 ahrens 850 2199 ahrens struct destroyarg { 851 2199 ahrens dsl_sync_task_group_t *dstg; 852 2199 ahrens char *snapname; 853 2199 ahrens char *failed; 854 10242 chris boolean_t defer; 855 2199 ahrens }; 856 2199 ahrens 857 2199 ahrens static int 858 2199 ahrens dsl_snapshot_destroy_one(char *name, void *arg) 859 2199 ahrens { 860 2199 ahrens struct destroyarg *da = arg; 861 2199 ahrens dsl_dataset_t *ds; 862 2199 ahrens int err; 863 10242 chris char *dsname; 864 2199 ahrens 865 10272 Matthew dsname = kmem_asprintf("%s@%s", name, da->snapname); 866 10298 Matthew err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); 867 10272 Matthew strfree(dsname); 868 6689 maybee if (err == 0) { 869 10242 chris struct dsl_ds_destroyarg *dsda; 870 10242 chris 871 6689 maybee dsl_dataset_make_exclusive(ds, da->dstg); 872 10298 Matthew if (ds->ds_objset != NULL) { 873 10298 Matthew dmu_objset_evict(ds->ds_objset); 874 10298 Matthew ds->ds_objset = NULL; 875 7237 ek110237 } 876 10242 chris dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); 877 10242 chris dsda->ds = ds; 878 10242 chris dsda->defer = da->defer; 879 6689 maybee dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 880 10242 chris dsl_dataset_destroy_sync, dsda, da->dstg, 0); 881 6689 maybee } else if (err == ENOENT) { 882 6689 maybee err = 0; 883 6689 maybee } else { 884 2199 ahrens (void) strcpy(da->failed, name); 885 2199 ahrens } 886 6689 maybee return (err); 887 2199 ahrens } 888 2199 ahrens 889 2199 ahrens /* 890 2199 ahrens * Destroy 'snapname' in all descendants of 'fsname'. 891 2199 ahrens */ 892 2199 ahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 893 2199 ahrens int 894 10242 chris dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) 895 2199 ahrens { 896 2199 ahrens int err; 897 2199 ahrens struct destroyarg da; 898 2199 ahrens dsl_sync_task_t *dst; 899 2199 ahrens spa_t *spa; 900 2199 ahrens 901 4603 ahrens err = spa_open(fsname, &spa, FTAG); 902 2199 ahrens if (err) 903 2199 ahrens return (err); 904 2199 ahrens da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 905 2199 ahrens da.snapname = snapname; 906 2199 ahrens da.failed = fsname; 907 10242 chris da.defer = defer; 908 2199 ahrens 909 2199 ahrens err = dmu_objset_find(fsname, 910 2417 ahrens dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 911 2199 ahrens 912 2199 ahrens if (err == 0) 913 2199 ahrens err = dsl_sync_task_group_wait(da.dstg); 914 2199 ahrens 915 2199 ahrens for (dst = list_head(&da.dstg->dstg_tasks); dst; 916 2199 ahrens dst = list_next(&da.dstg->dstg_tasks, dst)) { 917 10242 chris struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 918 10242 chris dsl_dataset_t *ds = dsda->ds; 919 10242 chris 920 6689 maybee /* 921 6689 maybee * Return the file system name that triggered the error 922 6689 maybee */ 923 2199 ahrens if (dst->dst_err) { 924 2199 ahrens dsl_dataset_name(ds, fsname); 925 4603 ahrens *strchr(fsname, '@') = '\0'; 926 2199 ahrens } 927 10242 chris ASSERT3P(dsda->rm_origin, ==, NULL); 928 6689 maybee dsl_dataset_disown(ds, da.dstg); 929 10242 chris kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 930 2199 ahrens } 931 2199 ahrens 932 2199 ahrens dsl_sync_task_group_destroy(da.dstg); 933 2199 ahrens spa_close(spa, FTAG); 934 2199 ahrens return (err); 935 789 ahrens } 936 789 ahrens 937 10242 chris static boolean_t 938 10242 chris dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 939 10242 chris { 940 10242 chris boolean_t might_destroy = B_FALSE; 941 10242 chris 942 10242 chris mutex_enter(&ds->ds_lock); 943 10242 chris if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 944 10242 chris DS_IS_DEFER_DESTROY(ds)) 945 10242 chris might_destroy = B_TRUE; 946 10242 chris mutex_exit(&ds->ds_lock); 947 10242 chris 948 10242 chris return (might_destroy); 949 10242 chris } 950 10242 chris 951 10242 chris /* 952 10242 chris * If we're removing a clone, and these three conditions are true: 953 10242 chris * 1) the clone's origin has no other children 954 10242 chris * 2) the clone's origin has no user references 955 10242 chris * 3) the clone's origin has been marked for deferred destruction 956 10242 chris * Then, prepare to remove the origin as part of this sync task group. 957 10242 chris */ 958 10242 chris static int 959 10242 chris dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 960 10242 chris { 961 10242 chris dsl_dataset_t *ds = dsda->ds; 962 10242 chris dsl_dataset_t *origin = ds->ds_prev; 963 10242 chris 964 10242 chris if (dsl_dataset_might_destroy_origin(origin)) { 965 10242 chris char *name; 966 10242 chris int namelen; 967 10242 chris int error; 968 10242 chris 969 10242 chris namelen = dsl_dataset_namelen(origin) + 1; 970 10242 chris name = kmem_alloc(namelen, KM_SLEEP); 971 10242 chris dsl_dataset_name(origin, name); 972 10242 chris #ifdef _KERNEL 973 10242 chris error = zfs_unmount_snap(name, NULL); 974 10242 chris if (error) { 975 10242 chris kmem_free(name, namelen); 976 10242 chris return (error); 977 10242 chris } 978 10242 chris #endif 979 10298 Matthew error = dsl_dataset_own(name, B_TRUE, tag, &origin); 980 10242 chris kmem_free(name, namelen); 981 10242 chris if (error) 982 10242 chris return (error); 983 10242 chris dsda->rm_origin = origin; 984 10242 chris dsl_dataset_make_exclusive(origin, tag); 985 10342 chris 986 10342 chris if (origin->ds_objset != NULL) { 987 10342 chris dmu_objset_evict(origin->ds_objset); 988 10342 chris origin->ds_objset = NULL; 989 10342 chris } 990 10242 chris } 991 10242 chris 992 10242 chris return (0); 993 10242 chris } 994 10242 chris 995 5367 ahrens /* 996 6689 maybee * ds must be opened as OWNER. On return (whether successful or not), 997 6689 maybee * ds will be closed and caller can no longer dereference it. 998 5367 ahrens */ 999 789 ahrens int 1000 10242 chris dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1001 789 ahrens { 1002 789 ahrens int err; 1003 2199 ahrens dsl_sync_task_group_t *dstg; 1004 2199 ahrens objset_t *os; 1005 789 ahrens dsl_dir_t *dd; 1006 2199 ahrens uint64_t obj; 1007 11022 Tom struct dsl_ds_destroyarg dsda = { 0 }; 1008 11022 Tom dsl_dataset_t dummy_ds = { 0 }; 1009 10242 chris 1010 10242 chris dsda.ds = ds; 1011 789 ahrens 1012 5367 ahrens if (dsl_dataset_is_snapshot(ds)) { 1013 2199 ahrens /* Destroying a snapshot is simpler */ 1014 6689 maybee dsl_dataset_make_exclusive(ds, tag); 1015 7237 ek110237 1016 10298 Matthew if (ds->ds_objset != NULL) { 1017 10298 Matthew dmu_objset_evict(ds->ds_objset); 1018 10298 Matthew ds->ds_objset = NULL; 1019 7237 ek110237 } 1020 10242 chris dsda.defer = defer; 1021 2199 ahrens err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1022 2199 ahrens dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1023 10242 chris &dsda, tag, 0); 1024 10242 chris ASSERT3P(dsda.rm_origin, ==, NULL); 1025 10385 chris goto out; 1026 10385 chris } else if (defer) { 1027 10385 chris err = EINVAL; 1028 5367 ahrens goto out; 1029 2199 ahrens } 1030 2199 ahrens 1031 2199 ahrens dd = ds->ds_dir; 1032 11022 Tom dummy_ds.ds_dir = dd; 1033 11022 Tom dummy_ds.ds_object = ds->ds_object; 1034 2199 ahrens 1035 2199 ahrens /* 1036 2199 ahrens * Check for errors and mark this ds as inconsistent, in 1037 2199 ahrens * case we crash while freeing the objects. 1038 2199 ahrens */ 1039 2199 ahrens err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1040 2199 ahrens dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1041 5367 ahrens if (err) 1042 5367 ahrens goto out; 1043 5367 ahrens 1044 10298 Matthew err = dmu_objset_from_ds(ds, &os); 1045 5367 ahrens if (err) 1046 5367 ahrens goto out; 1047 2199 ahrens 1048 2199 ahrens /* 1049 2199 ahrens * remove the objects in open context, so that we won't 1050 2199 ahrens * have too much to do in syncing context. 1051 2199 ahrens */ 1052 3025 ahrens for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1053 3025 ahrens ds->ds_phys->ds_prev_snap_txg)) { 1054 6992 maybee /* 1055 6992 maybee * Ignore errors, if there is not enough disk space 1056 6992 maybee * we will deal with it in dsl_dataset_destroy_sync(). 1057 6992 maybee */ 1058 6992 maybee (void) dmu_free_object(os, obj); 1059 2199 ahrens } 1060 2199 ahrens 1061 9396 Matthew /* 1062 9396 Matthew * We need to sync out all in-flight IO before we try to evict 1063 9396 Matthew * (the dataset evict func is trying to clear the cached entries 1064 9396 Matthew * for this dataset in the ARC). 1065 9396 Matthew */ 1066 9396 Matthew txg_wait_synced(dd->dd_pool, 0); 1067 9396 Matthew 1068 9396 Matthew /* 1069 9396 Matthew * If we managed to free all the objects in open 1070 9396 Matthew * context, the user space accounting should be zero. 1071 9396 Matthew */ 1072 9396 Matthew if (ds->ds_phys->ds_bp.blk_fill == 0 && 1073 10298 Matthew dmu_objset_userused_enabled(os)) { 1074 9396 Matthew uint64_t count; 1075 9396 Matthew 1076 9396 Matthew ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1077 9396 Matthew count == 0); 1078 9396 Matthew ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1079 9396 Matthew count == 0); 1080 9396 Matthew } 1081 9396 Matthew 1082 2199 ahrens if (err != ESRCH) 1083 5367 ahrens goto out; 1084 2199 ahrens 1085 6975 maybee rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1086 6975 maybee err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1087 6975 maybee rw_exit(&dd->dd_pool->dp_config_rwlock); 1088 6975 maybee 1089 6975 maybee if (err) 1090 6975 maybee goto out; 1091 6975 maybee 1092 10298 Matthew if (ds->ds_objset) { 1093 6689 maybee /* 1094 6689 maybee * We need to sync out all in-flight IO before we try 1095 6689 maybee * to evict (the dataset evict func is trying to clear 1096 6689 maybee * the cached entries for this dataset in the ARC). 1097 6689 maybee */ 1098 6689 maybee txg_wait_synced(dd->dd_pool, 0); 1099 5367 ahrens } 1100 789 ahrens 1101 2199 ahrens /* 1102 2199 ahrens * Blow away the dsl_dir + head dataset. 1103 2199 ahrens */ 1104 6689 maybee dsl_dataset_make_exclusive(ds, tag); 1105 10298 Matthew if (ds->ds_objset) { 1106 10298 Matthew dmu_objset_evict(ds->ds_objset); 1107 10298 Matthew ds->ds_objset = NULL; 1108 6975 maybee } 1109 10242 chris 1110 10242 chris /* 1111 10242 chris * If we're removing a clone, we might also need to remove its 1112 10242 chris * origin. 1113 10242 chris */ 1114 10242 chris do { 1115 10242 chris dsda.need_prep = B_FALSE; 1116 10242 chris if (dsl_dir_is_clone(dd)) { 1117 10242 chris err = dsl_dataset_origin_rm_prep(&dsda, tag); 1118 10242 chris if (err) { 1119 10242 chris dsl_dir_close(dd, FTAG); 1120 10242 chris goto out; 1121 10242 chris } 1122 10242 chris } 1123 10242 chris 1124 10242 chris dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1125 10242 chris dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1126 10242 chris dsl_dataset_destroy_sync, &dsda, tag, 0); 1127 10242 chris dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1128 11022 Tom dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); 1129 10242 chris err = dsl_sync_task_group_wait(dstg); 1130 10242 chris dsl_sync_task_group_destroy(dstg); 1131 10242 chris 1132 10242 chris /* 1133 10242 chris * We could be racing against 'zfs release' or 'zfs destroy -d' 1134 10242 chris * on the origin snap, in which case we can get EBUSY if we 1135 10242 chris * needed to destroy the origin snap but were not ready to 1136 10242 chris * do so. 1137 10242 chris */ 1138 10242 chris if (dsda.need_prep) { 1139 10242 chris ASSERT(err == EBUSY); 1140 10242 chris ASSERT(dsl_dir_is_clone(dd)); 1141 10242 chris ASSERT(dsda.rm_origin == NULL); 1142 10242 chris } 1143 10242 chris } while (dsda.need_prep); 1144 10242 chris 1145 10242 chris if (dsda.rm_origin != NULL) 1146 10242 chris dsl_dataset_disown(dsda.rm_origin, tag); 1147 10242 chris 1148 6689 maybee /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1149 5367 ahrens if (err) 1150 2199 ahrens dsl_dir_close(dd, FTAG); 1151 5367 ahrens out: 1152 6689 maybee dsl_dataset_disown(ds, tag); 1153 789 ahrens return (err); 1154 789 ahrens } 1155 789 ahrens 1156 3547 maybee blkptr_t * 1157 3547 maybee dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1158 789 ahrens { 1159 3547 maybee return (&ds->ds_phys->ds_bp); 1160 789 ahrens } 1161 789 ahrens 1162 789 ahrens void 1163 789 ahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1164 789 ahrens { 1165 789 ahrens ASSERT(dmu_tx_is_syncing(tx)); 1166 789 ahrens /* If it's the meta-objset, set dp_meta_rootbp */ 1167 789 ahrens if (ds == NULL) { 1168 789 ahrens tx->tx_pool->dp_meta_rootbp = *bp; 1169 789 ahrens } else { 1170 789 ahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1171 789 ahrens ds->ds_phys->ds_bp = *bp; 1172 789 ahrens } 1173 789 ahrens } 1174 789 ahrens 1175 789 ahrens spa_t * 1176 789 ahrens dsl_dataset_get_spa(dsl_dataset_t *ds) 1177 789 ahrens { 1178 789 ahrens return (ds->ds_dir->dd_pool->dp_spa); 1179 789 ahrens } 1180 789 ahrens 1181 789 ahrens void 1182 789 ahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1183 789 ahrens { 1184 789 ahrens dsl_pool_t *dp; 1185 789 ahrens 1186 789 ahrens if (ds == NULL) /* this is the meta-objset */ 1187 789 ahrens return; 1188 789 ahrens 1189 10298 Matthew ASSERT(ds->ds_objset != NULL); 1190 2885 ahrens 1191 2885 ahrens if (ds->ds_phys->ds_next_snap_obj != 0) 1192 2885 ahrens panic("dirtying snapshot!"); 1193 789 ahrens 1194 789 ahrens dp = ds->ds_dir->dd_pool; 1195 789 ahrens 1196 789 ahrens if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1197 789 ahrens /* up the hold count until we can be written out */ 1198 789 ahrens dmu_buf_add_ref(ds->ds_dbuf, ds); 1199 789 ahrens } 1200 789 ahrens } 1201 789 ahrens 1202 5378 ck153898 /* 1203 5378 ck153898 * The unique space in the head dataset can be calculated by subtracting 1204 5378 ck153898 * the space used in the most recent snapshot, that is still being used 1205 5378 ck153898 * in this file system, from the space currently in use. To figure out 1206 5378 ck153898 * the space in the most recent snapshot still in use, we need to take 1207 5378 ck153898 * the total space used in the snapshot and subtract out the space that 1208 5378 ck153898 * has been freed up since the snapshot was taken. 1209 5378 ck153898 */ 1210 5378 ck153898 static void 1211 5378 ck153898 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1212 5378 ck153898 { 1213 5378 ck153898 uint64_t mrs_used; 1214 5378 ck153898 uint64_t dlused, dlcomp, dluncomp; 1215 5378 ck153898 1216 5378 ck153898 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1217 5378 ck153898 1218 5378 ck153898 if (ds->ds_phys->ds_prev_snap_obj != 0) 1219 5378 ck153898 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1220 5378 ck153898 else 1221 5378 ck153898 mrs_used = 0; 1222 5378 ck153898 1223 5378 ck153898 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1224 5378 ck153898 &dluncomp)); 1225 5378 ck153898 1226 5378 ck153898 ASSERT3U(dlused, <=, mrs_used); 1227 5378 ck153898 ds->ds_phys->ds_unique_bytes = 1228 5378 ck153898 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1229 5378 ck153898 1230 5378 ck153898 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1231 5378 ck153898 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1232 5378 ck153898 SPA_VERSION_UNIQUE_ACCURATE) 1233 5378 ck153898 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1234 5378 ck153898 } 1235 5378 ck153898 1236 5378 ck153898 static uint64_t 1237 5378 ck153898 dsl_dataset_unique(dsl_dataset_t *ds) 1238 5378 ck153898 { 1239 5378 ck153898 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1240 5378 ck153898 dsl_dataset_recalc_head_uniq(ds); 1241 5378 ck153898 1242 5378 ck153898 return (ds->ds_phys->ds_unique_bytes); 1243 5378 ck153898 } 1244 5378 ck153898 1245 789 ahrens struct killarg { 1246 7390 Matthew dsl_dataset_t *ds; 1247 789 ahrens dmu_tx_t *tx; 1248 789 ahrens }; 1249 789 ahrens 1250 7390 Matthew /* ARGSUSED */ 1251 789 ahrens static int 1252 10922 Jeff kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 1253 10922 Jeff const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1254 789 ahrens { 1255 789 ahrens struct killarg *ka = arg; 1256 10922 Jeff dmu_tx_t *tx = ka->tx; 1257 789 ahrens 1258 7837 Matthew if (bp == NULL) 1259 7837 Matthew return (0); 1260 789 ahrens 1261 10922 Jeff if (zb->zb_level == ZB_ZIL_LEVEL) { 1262 10922 Jeff ASSERT(zilog != NULL); 1263 8746 Matthew /* 1264 8746 Matthew * It's a block in the intent log. It has no 1265 8746 Matthew * accounting, so just free it. 1266 8746 Matthew */ 1267 10922 Jeff dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 1268 8746 Matthew } else { 1269 10922 Jeff ASSERT(zilog == NULL); 1270 8746 Matthew ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1271 10922 Jeff (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 1272 8746 Matthew } 1273 7390 Matthew 1274 789 ahrens return (0); 1275 1731 bonwick } 1276 1731 bonwick 1277 1731 bonwick /* ARGSUSED */ 1278 1731 bonwick static int 1279 2199 ahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1280 1731 bonwick { 1281 2199 ahrens dsl_dataset_t *ds = arg1; 1282 5367 ahrens objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1283 5367 ahrens uint64_t count; 1284 5367 ahrens int err; 1285 1731 bonwick 1286 1731 bonwick /* 1287 1731 bonwick * Can't delete a head dataset if there are snapshots of it. 1288 1731 bonwick * (Except if the only snapshots are from the branch we cloned 1289 1731 bonwick * from.) 1290 1731 bonwick */ 1291 1731 bonwick if (ds->ds_prev != NULL && 1292 1731 bonwick ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1293 10816 Vitezslav return (EBUSY); 1294 5367 ahrens 1295 5367 ahrens /* 1296 5367 ahrens * This is really a dsl_dir thing, but check it here so that 1297 5367 ahrens * we'll be less likely to leave this dataset inconsistent & 1298 5367 ahrens * nearly destroyed. 1299 5367 ahrens */ 1300 5367 ahrens err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1301 5367 ahrens if (err) 1302 5367 ahrens return (err); 1303 5367 ahrens if (count != 0) 1304 5367 ahrens return (EEXIST); 1305 1731 bonwick 1306 2199 ahrens return (0); 1307 2199 ahrens } 1308 2199 ahrens 1309 2199 ahrens /* ARGSUSED */ 1310 2199 ahrens static void 1311 4543 marks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1312 2199 ahrens { 1313 2199 ahrens dsl_dataset_t *ds = arg1; 1314 4543 marks dsl_pool_t *dp = ds->ds_dir->dd_pool; 1315 2199 ahrens 1316 1731 bonwick /* Mark it as inconsistent on-disk, in case we crash */ 1317 1731 bonwick dmu_buf_will_dirty(ds->ds_dbuf, tx); 1318 2082 eschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1319 4543 marks 1320 4543 marks spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1321 4543 marks cr, "dataset = %llu", ds->ds_object); 1322 789 ahrens } 1323 789 ahrens 1324 10242 chris static int 1325 10242 chris dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1326 10242 chris dmu_tx_t *tx) 1327 10242 chris { 1328 10242 chris dsl_dataset_t *ds = dsda->ds; 1329 10242 chris dsl_dataset_t *ds_prev = ds->ds_prev; 1330 10242 chris 1331 10242 chris if (dsl_dataset_might_destroy_origin(ds_prev)) { 1332 10242 chris struct dsl_ds_destroyarg ndsda = {0}; 1333 10242 chris 1334 10242 chris /* 1335 10242 chris * If we're not prepared to remove the origin, don't remove 1336 10242 chris * the clone either. 1337 10242 chris */ 1338 10242 chris if (dsda->rm_origin == NULL) { 1339 10242 chris dsda->need_prep = B_TRUE; 1340 10242 chris return (EBUSY); 1341 10242 chris } 1342 10242 chris 1343 10242 chris ndsda.ds = ds_prev; 1344 10242 chris ndsda.is_origin_rm = B_TRUE; 1345 10242 chris return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1346 10242 chris } 1347 10242 chris 1348 10242 chris /* 1349 10242 chris * If we're not going to remove the origin after all, 1350 10242 chris * undo the open context setup. 1351 10242 chris */ 1352 10242 chris if (dsda->rm_origin != NULL) { 1353 10242 chris dsl_dataset_disown(dsda->rm_origin, tag); 1354 10242 chris dsda->rm_origin = NULL; 1355 10242 chris } 1356 10242 chris 1357 10242 chris return (0); 1358 10242 chris } 1359 10242 chris 1360 2199 ahrens /* ARGSUSED */ 1361 5367 ahrens int 1362 2199 ahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1363 789 ahrens { 1364 10242 chris struct dsl_ds_destroyarg *dsda = arg1; 1365 10242 chris dsl_dataset_t *ds = dsda->ds; 1366 789 ahrens 1367 6689 maybee /* we have an owner hold, so noone else can destroy us */ 1368 6689 maybee ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1369 6689 maybee 1370 10242 chris /* 1371 10242 chris * Only allow deferred destroy on pools that support it. 1372 10242 chris * NOTE: deferred destroy is only supported on snapshots. 1373 10242 chris */ 1374 10242 chris if (dsda->defer) { 1375 10242 chris if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1376 10242 chris SPA_VERSION_USERREFS) 1377 10242 chris return (ENOTSUP); 1378 10242 chris ASSERT(dsl_dataset_is_snapshot(ds)); 1379 10242 chris return (0); 1380 10242 chris } 1381 789 ahrens 1382 789 ahrens /* 1383 789 ahrens * Can't delete a head dataset if there are snapshots of it. 1384 789 ahrens * (Except if the only snapshots are from the branch we cloned 1385 789 ahrens * from.) 1386 789 ahrens */ 1387 789 ahrens if (ds->ds_prev != NULL && 1388 2199 ahrens ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1389 10816 Vitezslav return (EBUSY); 1390 789 ahrens 1391 789 ahrens /* 1392 789 ahrens * If we made changes this txg, traverse_dsl_dataset won't find 1393 789 ahrens * them. Try again. 1394 789 ahrens */ 1395 2199 ahrens if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1396 789 ahrens return (EAGAIN); 1397 10242 chris 1398 10242 chris if (dsl_dataset_is_snapshot(ds)) { 1399 10242 chris /* 1400 10242 chris * If this snapshot has an elevated user reference count, 1401 10242 chris * we can't destroy it yet. 1402 10242 chris */ 1403 10242 chris if (ds->ds_userrefs > 0 && !dsda->releasing) 1404 10242 chris return (EBUSY); 1405 10242 chris 1406 10242 chris mutex_enter(&ds->ds_lock); 1407 10242 chris /* 1408 10242 chris * Can't delete a branch point. However, if we're destroying 1409 10242 chris * a clone and removing its origin due to it having a user 1410 10242 chris * hold count of 0 and having been marked for deferred destroy, 1411 10242 chris * it's OK for the origin to have a single clone. 1412 10242 chris */ 1413 10242 chris if (ds->ds_phys->ds_num_children > 1414 10242 chris (dsda->is_origin_rm ? 2 : 1)) { 1415 10242 chris mutex_exit(&ds->ds_lock); 1416 10242 chris return (EEXIST); 1417 10242 chris } 1418 10242 chris mutex_exit(&ds->ds_lock); 1419 10242 chris } else if (dsl_dir_is_clone(ds->ds_dir)) { 1420 10242 chris return (dsl_dataset_origin_check(dsda, arg2, tx)); 1421 10242 chris } 1422 2199 ahrens 1423 2199 ahrens /* XXX we should do some i/o error checking... */ 1424 2199 ahrens return (0); 1425 2199 ahrens } 1426 2199 ahrens 1427 6689 maybee struct refsarg { 1428 6689 maybee kmutex_t lock; 1429 6689 maybee boolean_t gone; 1430 6689 maybee kcondvar_t cv; 1431 6689 maybee }; 1432 6689 maybee 1433 6689 maybee /* ARGSUSED */ 1434 6689 maybee static void 1435 6689 maybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1436 6689 maybee { 1437 6689 maybee struct refsarg *arg = argv; 1438 6689 maybee 1439 6689 maybee mutex_enter(&arg->lock); 1440 6689 maybee arg->gone = TRUE; 1441 6689 maybee cv_signal(&arg->cv); 1442 6689 maybee mutex_exit(&arg->lock); 1443 6689 maybee } 1444 6689 maybee 1445 6689 maybee static void 1446 6689 maybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1447 6689 maybee { 1448 6689 maybee struct refsarg arg; 1449 6689 maybee 1450 6689 maybee mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1451 6689 maybee cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1452 6689 maybee arg.gone = FALSE; 1453 6689 maybee (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1454 6689 maybee dsl_dataset_refs_gone); 1455 6689 maybee dmu_buf_rele(ds->ds_dbuf, tag); 1456 6689 maybee mutex_enter(&arg.lock); 1457 6689 maybee while (!arg.gone) 1458 6689 maybee cv_wait(&arg.cv, &arg.lock); 1459 6689 maybee ASSERT(arg.gone); 1460 6689 maybee mutex_exit(&arg.lock); 1461 6689 maybee ds->ds_dbuf = NULL; 1462 6689 maybee ds->ds_phys = NULL; 1463 6689 maybee mutex_destroy(&arg.lock); 1464 6689 maybee cv_destroy(&arg.cv); 1465 6689 maybee } 1466 6689 maybee 1467 10801 Matthew static void 1468 10801 Matthew remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) 1469 10801 Matthew { 1470 10801 Matthew objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1471 10801 Matthew uint64_t count; 1472 10801 Matthew int err; 1473 10801 Matthew 1474 10801 Matthew ASSERT(ds->ds_phys->ds_num_children >= 2); 1475 10801 Matthew err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 1476 10801 Matthew /* 1477 10801 Matthew * The err should not be ENOENT, but a bug in a previous version 1478 10801 Matthew * of the code could cause upgrade_clones_cb() to not set 1479 10801 Matthew * ds_next_snap_obj when it should, leading to a missing entry. 1480 10801 Matthew * If we knew that the pool was created after 1481 10801 Matthew * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 1482 10801 Matthew * ENOENT. However, at least we can check that we don't have 1483 10801 Matthew * too many entries in the next_clones_obj even after failing to 1484 10801 Matthew * remove this one. 1485 10801 Matthew */ 1486 10801 Matthew if (err != ENOENT) { 1487 10801 Matthew VERIFY3U(err, ==, 0); 1488 10801 Matthew } 1489 10801 Matthew ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1490 10801 Matthew &count)); 1491 10801 Matthew ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 1492 10801 Matthew } 1493 10801 Matthew 1494 5367 ahrens void 1495 4543 marks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1496 2199 ahrens { 1497 10242 chris struct dsl_ds_destroyarg *dsda = arg1; 1498 10242 chris dsl_dataset_t *ds = dsda->ds; 1499 2199 ahrens int err; 1500 2199 ahrens int after_branch_point = FALSE; 1501 2199 ahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 1502 2199 ahrens objset_t *mos = dp->dp_meta_objset; 1503 2199 ahrens dsl_dataset_t *ds_prev = NULL; 1504 2199 ahrens uint64_t obj; 1505 2199 ahrens 1506 6689 maybee ASSERT(ds->ds_owner); 1507 10242 chris ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1508 2199 ahrens ASSERT(ds->ds_prev == NULL || 1509 2199 ahrens ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1510 2199 ahrens ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1511 10242 chris 1512 10242 chris if (dsda->defer) { 1513 10242 chris ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1514 10242 chris if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) { 1515 10242 chris dmu_buf_will_dirty(ds->ds_dbuf, tx); 1516 10242 chris ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1517 10242 chris return; 1518 10242 chris } 1519 10242 chris } 1520 6689 maybee 1521 6689 maybee /* signal any waiters that this dataset is going away */ 1522 6689 maybee mutex_enter(&ds->ds_lock); 1523 6689 maybee ds->ds_owner = dsl_reaper; 1524 6689 maybee cv_broadcast(&ds->ds_exclusive_cv); 1525 6689 maybee mutex_exit(&ds->ds_lock); 1526 5378 ck153898 1527 5378 ck153898 /* Remove our reservation */ 1528 5378 ck153898 if (ds->ds_reserved != 0) { 1529 11022 Tom dsl_prop_setarg_t psa; 1530 11022 Tom uint64_t value = 0; 1531 11022 Tom 1532 11022 Tom dsl_prop_setarg_init_uint64(&psa, "refreservation", 1533 11022 Tom (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 1534 11022 Tom &value); 1535 11022 Tom psa.psa_effective_value = 0; /* predict default value */ 1536 11022 Tom 1537 11022 Tom dsl_dataset_set_reservation_sync(ds, &psa, cr, tx); 1538 5378 ck153898 ASSERT3U(ds->ds_reserved, ==, 0); 1539 5378 ck153898 } 1540 2199 ahrens 1541 2199 ahrens ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1542 2199 ahrens 1543 7046 ahrens dsl_pool_ds_destroyed(ds, tx); 1544 7046 ahrens 1545 2199 ahrens obj = ds->ds_object; 1546 789 ahrens 1547 789 ahrens if (ds->ds_phys->ds_prev_snap_obj != 0) { 1548 789 ahrens if (ds->ds_prev) { 1549 789 ahrens ds_prev = ds->ds_prev; 1550 789 ahrens } else { 1551 6689 maybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1552 6689 maybee ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1553 789 ahrens } 1554 789 ahrens after_branch_point = 1555 789 ahrens (ds_prev->ds_phys->ds_next_snap_obj != obj); 1556 789 ahrens 1557 789 ahrens dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1558 7046 ahrens if (after_branch_point && 1559 7046 ahrens ds_prev->ds_phys->ds_next_clones_obj != 0) { 1560 10801 Matthew remove_from_next_clones(ds_prev, obj, tx); 1561 7046 ahrens if (ds->ds_phys->ds_next_snap_obj != 0) { 1562 7046 ahrens VERIFY(0 == zap_add_int(mos, 1563 7046 ahrens ds_prev->ds_phys->ds_next_clones_obj, 1564 7046 ahrens ds->ds_phys->ds_next_snap_obj, tx)); 1565 7046 ahrens } 1566 7046 ahrens } 1567 789 ahrens if (after_branch_point && 1568 789 ahrens ds->ds_phys->ds_next_snap_obj == 0) { 1569 789 ahrens /* This clone is toast. */ 1570 789 ahrens ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1571 789 ahrens ds_prev->ds_phys->ds_num_children--; 1572 10242 chris 1573 10242 chris /* 1574 10242 chris * If the clone's origin has no other clones, no 1575 10242 chris * user holds, and has been marked for deferred 1576 10242 chris * deletion, then we should have done the necessary 1577 10242 chris * destroy setup for it. 1578 10242 chris */ 1579 10242 chris if (ds_prev->ds_phys->ds_num_children == 1 && 1580 10242 chris ds_prev->ds_userrefs == 0 && 1581 10242 chris DS_IS_DEFER_DESTROY(ds_prev)) { 1582 10242 chris ASSERT3P(dsda->rm_origin, !=, NULL); 1583 10242 chris } else { 1584 10242 chris ASSERT3P(dsda->rm_origin, ==, NULL); 1585 10242 chris } 1586 789 ahrens } else if (!after_branch_point) { 1587 789 ahrens ds_prev->ds_phys->ds_next_snap_obj = 1588 789 ahrens ds->ds_phys->ds_next_snap_obj; 1589 789 ahrens } 1590 789 ahrens } 1591 789 ahrens 1592 789 ahrens if (ds->ds_phys->ds_next_snap_obj != 0) { 1593 2199 ahrens blkptr_t bp; 1594 789 ahrens dsl_dataset_t *ds_next; 1595 789 ahrens uint64_t itor = 0; 1596 5378 ck153898 uint64_t old_unique; 1597 7390 Matthew int64_t used = 0, compressed = 0, uncompressed = 0; 1598 789 ahrens 1599 6689 maybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1600 6689 maybee ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1601 789 ahrens ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1602 5378 ck153898 1603 5378 ck153898 old_unique = dsl_dataset_unique(ds_next); 1604 789 ahrens 1605 789 ahrens dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1606 789 ahrens ds_next->ds_phys->ds_prev_snap_obj = 1607 789 ahrens ds->ds_phys->ds_prev_snap_obj; 1608 789 ahrens ds_next->ds_phys->ds_prev_snap_txg = 1609 789 ahrens ds->ds_phys->ds_prev_snap_txg; 1610 789 ahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1611 789 ahrens ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1612 789 ahrens 1613 789 ahrens /* 1614 789 ahrens * Transfer to our deadlist (which will become next's 1615 789 ahrens * new deadlist) any entries from next's current 1616 789 ahrens * deadlist which were born before prev, and free the 1617 789 ahrens * other entries. 1618 789 ahrens * 1619 789 ahrens * XXX we're doing this long task with the config lock held 1620 789 ahrens */ 1621 6689 maybee while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1622 789 ahrens if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1623 1544 eschrock VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1624 1544 eschrock &bp, tx)); 1625 789 ahrens if (ds_prev && !after_branch_point && 1626 789 ahrens bp.blk_birth > 1627 789 ahrens ds_prev->ds_phys->ds_prev_snap_txg) { 1628 789 ahrens ds_prev->ds_phys->ds_unique_bytes += 1629 10922 Jeff bp_get_dsize_sync(dp->dp_spa, &bp); 1630 789 ahrens } 1631 789 ahrens } else { 1632 10922 Jeff used += bp_get_dsize_sync(dp->dp_spa, &bp); 1633 789 ahrens compressed += BP_GET_PSIZE(&bp); 1634 789 ahrens uncompressed += BP_GET_UCSIZE(&bp); 1635 10922 Jeff dsl_free(dp, tx->tx_txg, &bp); 1636 789 ahrens } 1637 789 ahrens } 1638 789 ahrens 1639 7390 Matthew ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1640 7390 Matthew 1641 7390 Matthew /* change snapused */ 1642 7390 Matthew dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1643 7390 Matthew -used, -compressed, -uncompressed, tx); 1644 7390 Matthew 1645 789 ahrens /* free next's deadlist */ 1646 789 ahrens bplist_close(&ds_next->ds_deadlist); 1647 789 ahrens bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1648 789 ahrens 1649 789 ahrens /* set next's deadlist to our deadlist */ 1650 6689 maybee bplist_close(&ds->ds_deadlist); 1651 789 ahrens ds_next->ds_phys->ds_deadlist_obj = 1652 789 ahrens ds->ds_phys->ds_deadlist_obj; 1653 1544 eschrock VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1654 1544 eschrock ds_next->ds_phys->ds_deadlist_obj)); 1655 789 ahrens ds->ds_phys->ds_deadlist_obj = 0; 1656 789 ahrens 1657 789 ahrens if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1658 789 ahrens /* 1659 789 ahrens * Update next's unique to include blocks which 1660 789 ahrens * were previously shared by only this snapshot 1661 789 ahrens * and it. Those blocks will be born after the 1662 789 ahrens * prev snap and before this snap, and will have 1663 789 ahrens * died after the next snap and before the one 1664 789 ahrens * after that (ie. be on the snap after next's 1665 789 ahrens * deadlist). 1666 789 ahrens * 1667 789 ahrens * XXX we're doing this long task with the 1668 789 ahrens * config lock held 1669 789 ahrens */ 1670 789 ahrens dsl_dataset_t *ds_after_next; 1671 7390 Matthew uint64_t space; 1672 789 ahrens 1673 6689 maybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1674 6689 maybee ds_next->ds_phys->ds_next_snap_obj, 1675 6689 maybee FTAG, &ds_after_next)); 1676 7390 Matthew 1677 7390 Matthew VERIFY(0 == 1678 7390 Matthew bplist_space_birthrange(&ds_after_next->ds_deadlist, 1679 7390 Matthew ds->ds_phys->ds_prev_snap_txg, 1680 7390 Matthew ds->ds_phys->ds_creation_txg, &space)); 1681 7390 Matthew ds_next->ds_phys->ds_unique_bytes += space; 1682 789 ahrens 1683 6689 maybee dsl_dataset_rele(ds_after_next, FTAG); 1684 789 ahrens ASSERT3P(ds_next->ds_prev, ==, NULL); 1685 789 ahrens } else { 1686 789 ahrens ASSERT3P(ds_next->ds_prev, ==, ds); 1687 6689 maybee dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1688 6689 maybee ds_next->ds_prev = NULL; 1689 789 ahrens if (ds_prev) { 1690 6689 maybee VERIFY(0 == dsl_dataset_get_ref(dp, 1691 6689 maybee ds->ds_phys->ds_prev_snap_obj, 1692 6689 maybee ds_next, &ds_next->ds_prev)); 1693 789 ahrens } 1694 5378 ck153898 1695 5378 ck153898 dsl_dataset_recalc_head_uniq(ds_next); 1696 5378 ck153898 1697 5378 ck153898 /* 1698 5378 ck153898 * Reduce the amount of our unconsmed refreservation 1699 5378 ck153898 * being charged to our parent by the amount of 1700 5378 ck153898 * new unique data we have gained. 1701 5378 ck153898 */ 1702 5378 ck153898 if (old_unique < ds_next->ds_reserved) { 1703 5378 ck153898 int64_t mrsdelta; 1704 5378 ck153898 uint64_t new_unique = 1705 5378 ck153898 ds_next->ds_phys->ds_unique_bytes; 1706 5378 ck153898 1707 5378 ck153898 ASSERT(old_unique <= new_unique); 1708 5378 ck153898 mrsdelta = MIN(new_unique - old_unique, 1709 5378 ck153898 ds_next->ds_reserved - old_unique); 1710 7390 Matthew dsl_dir_diduse_space(ds->ds_dir, 1711 7390 Matthew DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1712 5378 ck153898 } 1713 789 ahrens } 1714 6689 maybee dsl_dataset_rele(ds_next, FTAG); 1715 789 ahrens } else { 1716 789 ahrens /* 1717 789 ahrens * There's no next snapshot, so this is a head dataset. 1718 789 ahrens * Destroy the deadlist. Unless it's a clone, the 1719 789 ahrens * deadlist should be empty. (If it's a clone, it's 1720 789 ahrens * safe to ignore the deadlist contents.) 1721 789 ahrens */ 1722 789 ahrens struct killarg ka; 1723 789 ahrens 1724 789 ahrens ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1725 789 ahrens bplist_close(&ds->ds_deadlist); 1726 789 ahrens bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1727 789 ahrens ds->ds_phys->ds_deadlist_obj = 0; 1728 789 ahrens 1729 789 ahrens /* 1730 789 ahrens * Free everything that we point to (that's born after 1731 789 ahrens * the previous snapshot, if we are a clone) 1732 789 ahrens * 1733 7390 Matthew * NB: this should be very quick, because we already 1734 7390 Matthew * freed all the objects in open context. 1735 789 ahrens */ 1736 7390 Matthew ka.ds = ds; 1737 789 ahrens ka.tx = tx; 1738 7837 Matthew err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1739 7837 Matthew TRAVERSE_POST, kill_blkptr, &ka); 1740 789 ahrens ASSERT3U(err, ==, 0); 1741 9390 chris ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1742 7390 Matthew ds->ds_phys->ds_unique_bytes == 0); 1743 10342 chris 1744 10342 chris if (ds->ds_prev != NULL) { 1745 10342 chris dsl_dataset_rele(ds->ds_prev, ds); 1746 10342 chris ds->ds_prev = ds_prev = NULL; 1747 10342 chris } 1748 789 ahrens } 1749 789 ahrens 1750 6689 maybee if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1751 6689 maybee /* Erase the link in the dir */ 1752 6689 maybee dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1753 6689 maybee ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1754 6689 maybee ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1755 789 ahrens err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1756 789 ahrens ASSERT(err == 0); 1757 789 ahrens } else { 1758 789 ahrens /* remove from snapshot namespace */ 1759 789 ahrens dsl_dataset_t *ds_head; 1760 6689 maybee ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1761 6689 maybee VERIFY(0 == dsl_dataset_hold_obj(dp, 1762 6689 maybee ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1763 2207 ahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 1764 789 ahrens #ifdef ZFS_DEBUG 1765 789 ahrens { 1766 789 ahrens uint64_t val; 1767 6492 timh 1768 6689 maybee err = dsl_dataset_snap_lookup(ds_head, 1769 6492 timh ds->ds_snapname, &val); 1770 789 ahrens ASSERT3U(err, ==, 0); 1771 789 ahrens ASSERT3U(val, ==, obj); 1772 789 ahrens } 1773 789 ahrens #endif 1774 6689 maybee err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1775 789 ahrens ASSERT(err == 0); 1776 6689 maybee dsl_dataset_rele(ds_head, FTAG); 1777 789 ahrens } 1778 789 ahrens 1779 789 ahrens if (ds_prev && ds->ds_prev != ds_prev) 1780 6689 maybee dsl_dataset_rele(ds_prev, FTAG); 1781 789 ahrens 1782 5094 lling spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1783 4543 marks spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1784 4543 marks cr, "dataset = %llu", ds->ds_object); 1785 4543 marks 1786 7046 ahrens if (ds->ds_phys->ds_next_clones_obj != 0) { 1787 7046 ahrens uint64_t count; 1788 7046 ahrens ASSERT(0 == zap_count(mos, 1789 7046 ahrens ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1790 7046 ahrens VERIFY(0 == dmu_object_free(mos, 1791 7046 ahrens ds->ds_phys->ds_next_clones_obj, tx)); 1792 7265 ahrens } 1793 7390 Matthew if (ds->ds_phys->ds_props_obj != 0) 1794 7390 Matthew VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1795 10242 chris if (ds->ds_phys->ds_userrefs_obj != 0) 1796 10242 chris VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1797 6689 maybee dsl_dir_close(ds->ds_dir, ds); 1798 6689 maybee ds->ds_dir = NULL; 1799 6689 maybee dsl_dataset_drain_refs(ds, tag); 1800 2199 ahrens VERIFY(0 == dmu_object_free(mos, obj, tx)); 1801 10242 chris 1802 10242 chris if (dsda->rm_origin) { 1803 10242 chris /* 1804 10242 chris * Remove the origin of the clone we just destroyed. 1805 10242 chris */ 1806 10242 chris struct dsl_ds_destroyarg ndsda = {0}; 1807 10242 chris 1808 10342 chris ndsda.ds = dsda->rm_origin; 1809 10242 chris dsl_dataset_destroy_sync(&ndsda, tag, cr, tx); 1810 10242 chris } 1811 2199 ahrens } 1812 2199 ahrens 1813 5378 ck153898 static int 1814 5378 ck153898 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1815 5378 ck153898 { 1816 5378 ck153898 uint64_t asize; 1817 5378 ck153898 1818 5378 ck153898 if (!dmu_tx_is_syncing(tx)) 1819 5378 ck153898 return (0); 1820 5378 ck153898 1821 5378 ck153898 /* 1822 5378 ck153898 * If there's an fs-only reservation, any blocks that might become 1823 5378 ck153898 * owned by the snapshot dataset must be accommodated by space 1824 5378 ck153898 * outside of the reservation. 1825 5378 ck153898 */ 1826 5378 ck153898 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1827 5378 ck153898 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1828 5378 ck153898 return (ENOSPC); 1829 5378 ck153898 1830 5378 ck153898 /* 1831 5378 ck153898 * Propogate any reserved space for this snapshot to other 1832 5378 ck153898 * snapshot checks in this sync group. 1833 5378 ck153898 */ 1834 5378 ck153898 if (asize > 0) 1835 5378 ck153898 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1836 5378 ck153898 1837 5378 ck153898 return (0); 1838 5378 ck153898 } 1839 5378 ck153898 1840 2199 ahrens /* ARGSUSED */ 1841 2199 ahrens int 1842 2199 ahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1843 2199 ahrens { 1844 5367 ahrens dsl_dataset_t *ds = arg1; 1845 2199 ahrens const char *snapname = arg2; 1846 2199 ahrens int err; 1847 2199 ahrens uint64_t value; 1848 789 ahrens 1849 789 ahrens /* 1850 2199 ahrens * We don't allow multiple snapshots of the same txg. If there 1851 2199 ahrens * is already one, try again. 1852 789 ahrens */ 1853 2199 ahrens if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1854 2199 ahrens return (EAGAIN); 1855 789 ahrens 1856 2199 ahrens /* 1857 2199 ahrens * Check for conflicting name snapshot name. 1858 2199 ahrens */ 1859 6689 maybee err = dsl_dataset_snap_lookup(ds, snapname, &value); 1860 2199 ahrens if (err == 0) 1861 2199 ahrens return (EEXIST); 1862 2199 ahrens if (err != ENOENT) 1863 2199 ahrens return (err); 1864 3978 mmusante 1865 3978 mmusante /* 1866 3978 mmusante * Check that the dataset's name is not too long. Name consists 1867 3978 mmusante * of the dataset's length + 1 for the @-sign + snapshot name's length 1868 3978 mmusante */ 1869 3978 mmusante if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1870 3978 mmusante return (ENAMETOOLONG); 1871 5378 ck153898 1872 5378 ck153898 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1873 5378 ck153898 if (err) 1874 5378 ck153898 return (err); 1875 2199 ahrens 1876 2199 ahrens ds->ds_trysnap_txg = tx->tx_txg; 1877 789 ahrens return (0); 1878 789 ahrens } 1879 789 ahrens 1880 2199 ahrens void 1881 4543 marks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1882 789 ahrens { 1883 5367 ahrens dsl_dataset_t *ds = arg1; 1884 2199 ahrens const char *snapname = arg2; 1885 2199 ahrens dsl_pool_t *dp = ds->ds_dir->dd_pool; 1886 789 ahrens dmu_buf_t *dbuf; 1887 789 ahrens dsl_dataset_phys_t *dsphys; 1888 7046 ahrens uint64_t dsobj, crtxg; 1889 789 ahrens objset_t *mos = dp->dp_meta_objset; 1890 789 ahrens int err; 1891 789 ahrens 1892 2199 ahrens ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1893 7046 ahrens 1894 7046 ahrens /* 1895 7046 ahrens * The origin's ds_creation_txg has to be < TXG_INITIAL 1896 7046 ahrens */ 1897 7046 ahrens if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1898 7046 ahrens crtxg = 1; 1899 7046 ahrens else 1900 7046 ahrens crtxg = tx->tx_txg; 1901 789 ahrens 1902 928 tabriz dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1903 928 tabriz DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1904 1544 eschrock VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1905 789 ahrens dmu_buf_will_dirty(dbuf, tx); 1906 789 ahrens dsphys = dbuf->db_data; 1907 6689 maybee bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1908 2199 ahrens dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1909 789 ahrens dsphys->ds_fsid_guid = unique_create(); 1910 789 ahrens (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1911 789 ahrens sizeof (dsphys->ds_guid)); 1912 789 ahrens dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1913 789 ahrens dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1914 789 ahrens dsphys->ds_next_snap_obj = ds->ds_object; 1915 789 ahrens dsphys->ds_num_children = 1; 1916 789 ahrens dsphys->ds_creation_time = gethrestime_sec(); 1917 7046 ahrens dsphys->ds_creation_txg = crtxg; 1918 789 ahrens dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1919 789 ahrens dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1920 789 ahrens dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1921 789 ahrens dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1922 2082 eschrock dsphys->ds_flags = ds->ds_phys->ds_flags; 1923 789 ahrens dsphys->ds_bp = ds->ds_phys->ds_bp; 1924 1544 eschrock dmu_buf_rele(dbuf, FTAG); 1925 789 ahrens 1926 2199 ahrens ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1927 2199 ahrens if (ds->ds_prev) { 1928 7046 ahrens uint64_t next_clones_obj = 1929 7046 ahrens ds->ds_prev->ds_phys->ds_next_clones_obj; 1930 2199 ahrens ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1931 789 ahrens ds->ds_object || 1932 2199 ahrens ds->ds_prev->ds_phys->ds_num_children > 1); 1933 2199 ahrens if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1934 2199 ahrens dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1935 789 ahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1936 2199 ahrens ds->ds_prev->ds_phys->ds_creation_txg); 1937 2199 ahrens ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1938 7046 ahrens } else if (next_clones_obj != 0) { 1939 10801 Matthew remove_from_next_clones(ds->ds_prev, 1940 10801 Matthew dsphys->ds_next_snap_obj, tx); 1941 7046 ahrens VERIFY3U(0, ==, zap_add_int(mos, 1942 7046 ahrens next_clones_obj, dsobj, tx)); 1943 789 ahrens } 1944 789 ahrens } 1945 789 ahrens 1946 5378 ck153898 /* 1947 5378 ck153898 * If we have a reference-reservation on this dataset, we will 1948 5378 ck153898 * need to increase the amount of refreservation being charged 1949 5378 ck153898 * since our unique space is going to zero. 1950 5378 ck153898 */ 1951 5378 ck153898 if (ds->ds_reserved) { 1952 5378 ck153898 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1953 7390 Matthew dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1954 7390 Matthew add, 0, 0, tx); 1955 5378 ck153898 } 1956 5378 ck153898 1957 789 ahrens bplist_close(&ds->ds_deadlist); 1958 789 ahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 1959 5712 ahrens ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1960 789 ahrens ds->ds_phys->ds_prev_snap_obj = dsobj; 1961 7046 ahrens ds->ds_phys->ds_prev_snap_txg = crtxg; 1962 789 ahrens ds->ds_phys->ds_unique_bytes = 0; 1963 5378 ck153898 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1964 5378 ck153898 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1965 789 ahrens ds->ds_phys->ds_deadlist_obj = 1966 789 ahrens bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1967 1544 eschrock VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1968 1544 eschrock ds->ds_phys->ds_deadlist_obj)); 1969 789 ahrens 1970 789 ahrens dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1971 789 ahrens err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1972 789 ahrens snapname, 8, 1, &dsobj, tx); 1973 789 ahrens ASSERT(err == 0); 1974 789 ahrens 1975 789 ahrens if (ds->ds_prev) 1976 6689 maybee dsl_dataset_drop_ref(ds->ds_prev, ds); 1977 6689 maybee VERIFY(0 == dsl_dataset_get_ref(dp, 1978 6689 maybee ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1979 7046 ahrens 1980 7046 ahrens dsl_pool_ds_snapshotted(ds, tx); 1981 10373 chris 1982 10373 chris dsl_dir_snap_cmtime_update(ds->ds_dir); 1983 4543 marks 1984 4543 marks spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1985 4603 ahrens "dataset = %llu", dsobj); 1986 789 ahrens } 1987 789 ahrens 1988 789 ahrens void 1989 3547 maybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1990 789 ahrens { 1991 789 ahrens ASSERT(dmu_tx_is_syncing(tx)); 1992 10298 Matthew ASSERT(ds->ds_objset != NULL); 1993 789 ahrens ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1994 789 ahrens 1995 4787 ahrens /* 1996 4787 ahrens * in case we had to change ds_fsid_guid when we opened it, 1997 4787 ahrens * sync it out now. 1998 4787 ahrens */ 1999 4787 ahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 2000 4787 ahrens ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 2001 4787 ahrens 2002 789 ahrens dsl_dir_dirty(ds->ds_dir, tx); 2003 10298 Matthew dmu_objset_sync(ds->ds_objset, zio, tx); 2004 789 ahrens } 2005 789 ahrens 2006 789 ahrens void 2007 2885 ahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 2008 789 ahrens { 2009 5378 ck153898 uint64_t refd, avail, uobjs, aobjs; 2010 5378 ck153898 2011 2885 ahrens dsl_dir_stats(ds->ds_dir, nv); 2012 5378 ck153898 2013 5378 ck153898 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2014 5378 ck153898 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2015 5378 ck153898 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2016 789 ahrens 2017 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2018 2885 ahrens ds->ds_phys->ds_creation_time); 2019 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2020 2885 ahrens ds->ds_phys->ds_creation_txg); 2021 5378 ck153898 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2022 5378 ck153898 ds->ds_quota); 2023 5378 ck153898 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2024 5378 ck153898 ds->ds_reserved); 2025 6643 eschrock dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2026 6643 eschrock ds->ds_phys->ds_guid); 2027 10575 Eric dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 2028 10575 Eric dsl_dataset_unique(ds)); 2029 10575 Eric dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 2030 10575 Eric ds->ds_object); 2031 11022 Tom dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 2032 11022 Tom ds->ds_userrefs); 2033 10242 chris dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2034 10242 chris DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2035 789 ahrens 2036 789 ahrens if (ds->ds_phys->ds_next_snap_obj) { 2037 789 ahrens /* 2038 789 ahrens * This is a snapshot; override the dd's space used with 2039 2885 ahrens * our unique space and compression ratio. 2040 789 ahrens */ 2041 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2042 2885 ahrens ds->ds_phys->ds_unique_bytes); 2043 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 2044 2885 ahrens ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2045 2885 ahrens (ds->ds_phys->ds_uncompressed_bytes * 100 / 2046 2885 ahrens ds->ds_phys->ds_compressed_bytes)); 2047 789 ahrens } 2048 789 ahrens } 2049 789 ahrens 2050 2885 ahrens void 2051 2885 ahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2052 789 ahrens { 2053 2885 ahrens stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2054 2885 ahrens stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2055 5367 ahrens stat->dds_guid = ds->ds_phys->ds_guid; 2056 2885 ahrens if (ds->ds_phys->ds_next_snap_obj) { 2057 2885 ahrens stat->dds_is_snapshot = B_TRUE; 2058 2885 ahrens stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2059 8228 Eric } else { 2060 8228 Eric stat->dds_is_snapshot = B_FALSE; 2061 8228 Eric stat->dds_num_clones = 0; 2062 2885 ahrens } 2063 2885 ahrens 2064 2885 ahrens /* clone origin is really a dsl_dir thing... */ 2065 5446 ahrens rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2066 7046 ahrens if (dsl_dir_is_clone(ds->ds_dir)) { 2067 2885 ahrens dsl_dataset_t *ods; 2068 2885 ahrens 2069 6689 maybee VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2070 6689 maybee ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2071 5367 ahrens dsl_dataset_name(ods, stat->dds_origin); 2072 6689 maybee dsl_dataset_drop_ref(ods, FTAG); 2073 8228 Eric } else { 2074 8228 Eric stat->dds_origin[0] = '\0'; 2075 2885 ahrens } 2076 5446 ahrens rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2077 2885 ahrens } 2078 2885 ahrens 2079 2885 ahrens uint64_t 2080 2885 ahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2081 2885 ahrens { 2082 4787 ahrens return (ds->ds_fsid_guid); 2083 2885 ahrens } 2084 2885 ahrens 2085 2885 ahrens void 2086 2885 ahrens dsl_dataset_space(dsl_dataset_t *ds, 2087 2885 ahrens uint64_t *refdbytesp, uint64_t *availbytesp, 2088 2885 ahrens uint64_t *usedobjsp, uint64_t *availobjsp) 2089 2885 ahrens { 2090 2885 ahrens *refdbytesp = ds->ds_phys->ds_used_bytes; 2091 2885 ahrens *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2092 5378 ck153898 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2093 5378 ck153898 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2094 5378 ck153898 if (ds->ds_quota != 0) { 2095 5378 ck153898 /* 2096 5378 ck153898 * Adjust available bytes according to refquota 2097 5378 ck153898 */ 2098 5378 ck153898 if (*refdbytesp < ds->ds_quota) 2099 5378 ck153898 *availbytesp = MIN(*availbytesp, 2100 5378 ck153898 ds->ds_quota - *refdbytesp); 2101 5378 ck153898 else 2102 5378 ck153898 *availbytesp = 0; 2103 5378 ck153898 } 2104 2885 ahrens *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2105 2885 ahrens *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2106 789 ahrens } 2107 789 ahrens 2108 5326 ek110237 boolean_t 2109 5326 ek110237 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2110 5326 ek110237 { 2111 5326 ek110237 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2112 5326 ek110237 2113 5326 ek110237 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2114 5326 ek110237 dsl_pool_sync_context(dp)); 2115 5326 ek110237 if (ds->ds_prev == NULL) 2116 5326 ek110237 return (B_FALSE); 2117 5326 ek110237 if (ds->ds_phys->ds_bp.blk_birth > 2118 5326 ek110237 ds->ds_prev->ds_phys->ds_creation_txg) 2119 5326 ek110237 return (B_TRUE); 2120 5326 ek110237 return (B_FALSE); 2121 5326 ek110237 } 2122 5326 ek110237 2123 2199 ahrens /* ARGSUSED */ 2124 2199 ahrens static int 2125 2199 ahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2126 2199 ahrens { 2127 2199 ahrens dsl_dataset_t *ds = arg1; 2128 2199 ahrens char *newsnapname = arg2; 2129 2199 ahrens dsl_dir_t *dd = ds->ds_dir; 2130 2199 ahrens dsl_dataset_t *hds; 2131 2199 ahrens uint64_t val; 2132 2199 ahrens int err; 2133 789 ahrens 2134 6689 maybee err = dsl_dataset_hold_obj(dd->dd_pool, 2135 6689 maybee dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2136 789 ahrens if (err) 2137 789 ahrens return (err); 2138 789 ahrens 2139 2199 ahrens /* new name better not be in use */ 2140 6689 maybee err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2141 6689 maybee dsl_dataset_rele(hds, FTAG); 2142 789 ahrens 2143 2199 ahrens if (err == 0) 2144 2199 ahrens err = EEXIST; 2145 2199 ahrens else if (err == ENOENT) 2146 2199 ahrens err = 0; 2147 4007 mmusante 2148 4007 mmusante /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2149 4007 mmusante if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2150 4007 mmusante err = ENAMETOOLONG; 2151 4007 mmusante 2152 2199 ahrens return (err); 2153 2199 ahrens } 2154 789 ahrens 2155 2199 ahrens static void 2156 4543 marks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2157 4543 marks cred_t *cr, dmu_tx_t *tx) 2158 2199 ahrens { 2159 2199 ahrens dsl_dataset_t *ds = arg1; 2160 4543 marks const char *newsnapname = arg2; 2161 2199 ahrens dsl_dir_t *dd = ds->ds_dir; 2162 2199 ahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 2163 2199 ahrens dsl_dataset_t *hds; 2164 2199 ahrens int err; 2165 789 ahrens 2166 2199 ahrens ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2167 789 ahrens 2168 6689 maybee VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2169 6689 maybee dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2170 789 ahrens 2171 2199 ahrens VERIFY(0 == dsl_dataset_get_snapname(ds)); 2172 6689 maybee err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2173 2199 ahrens ASSERT3U(err, ==, 0); 2174 2199 ahrens mutex_enter(&ds->ds_lock); 2175 2199 ahrens (void) strcpy(ds->ds_snapname, newsnapname); 2176 2199 ahrens mutex_exit(&ds->ds_lock); 2177 2199 ahrens err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2178 2199 ahrens ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2179 2199 ahrens ASSERT3U(err, ==, 0); 2180 789 ahrens 2181 4543 marks spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2182 4543 marks cr, "dataset = %llu", ds->ds_object); 2183 6689 maybee dsl_dataset_rele(hds, FTAG); 2184 789 ahrens } 2185 789 ahrens 2186 5326 ek110237 struct renamesnaparg { 2187 4007 mmusante dsl_sync_task_group_t *dstg; 2188 4007 mmusante char failed[MAXPATHLEN]; 2189 4007 mmusante char *oldsnap; 2190 4007 mmusante char *newsnap; 2191 4007 mmusante }; 2192 4007 mmusante 2193 4007 mmusante static int 2194 4007 mmusante dsl_snapshot_rename_one(char *name, void *arg) 2195 4007 mmusante { 2196 5326 ek110237 struct renamesnaparg *ra = arg; 2197 4007 mmusante dsl_dataset_t *ds = NULL; 2198 4007 mmusante char *cp; 2199 4007 mmusante int err; 2200 4007 mmusante 2201 4007 mmusante cp = name + strlen(name); 2202 4007 mmusante *cp = '@'; 2203 4007 mmusante (void) strcpy(cp + 1, ra->oldsnap); 2204 4543 marks 2205 4543 marks /* 2206 4543 marks * For recursive snapshot renames the parent won't be changing 2207 4543 marks * so we just pass name for both the to/from argument. 2208 4543 marks */ 2209 7312 Matthew err = zfs_secpolicy_rename_perms(name, name, CRED()); 2210 7312 Matthew if (err == ENOENT) { 2211 7312 Matthew return (0); 2212 7312 Matthew } else if (err) { 2213 4543 marks (void) strcpy(ra->failed, name); 2214 4543 marks return (err); 2215 4543 marks } 2216 4543 marks 2217 6689 maybee #ifdef _KERNEL 2218 6689 maybee /* 2219 6689 maybee * For all filesystems undergoing rename, we'll need to unmount it. 2220 6689 maybee */ 2221 6689 maybee (void) zfs_unmount_snap(name, NULL); 2222 6689 maybee #endif 2223 6689 maybee err = dsl_dataset_hold(name, ra->dstg, &ds); 2224 6689 maybee *cp = '\0'; 2225 4007 mmusante if (err == ENOENT) { 2226 4007 mmusante return (0); 2227 6689 maybee } else if (err) { 2228 4007 mmusante (void) strcpy(ra->failed, name); 2229 4007 mmusante return (err); 2230 4007 mmusante } 2231 4007 mmusante 2232 4007 mmusante dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2233 4007 mmusante dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2234 4007 mmusante 2235 4007 mmusante return (0); 2236 4007 mmusante } 2237 4007 mmusante 2238 4007 mmusante static int 2239 4007 mmusante dsl_recursive_rename(char *oldname, const char *newname) 2240 4007 mmusante { 2241 4007 mmusante int err; 2242 5326 ek110237 struct renamesnaparg *ra; 2243 4007 mmusante dsl_sync_task_t *dst; 2244 4007 mmusante spa_t *spa; 2245 4007 mmusante char *cp, *fsname = spa_strdup(oldname); 2246 4007 mmusante int len = strlen(oldname); 2247 4007 mmusante 2248 4007 mmusante /* truncate the snapshot name to get the fsname */ 2249 4007 mmusante cp = strchr(fsname, '@'); 2250 4007 mmusante *cp = '\0'; 2251 4007 mmusante 2252 4603 ahrens err = spa_open(fsname, &spa, FTAG); 2253 4007 mmusante if (err) { 2254 4007 mmusante kmem_free(fsname, len + 1); 2255 4007 mmusante return (err); 2256 4007 mmusante } 2257 5326 ek110237 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2258 4007 mmusante ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2259 4007 mmusante 2260 4007 mmusante ra->oldsnap = strchr(oldname, '@') + 1; 2261 4007 mmusante ra->newsnap = strchr(newname, '@') + 1; 2262 4007 mmusante *ra->failed = '\0'; 2263 4007 mmusante 2264 4007 mmusante err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2265 4007 mmusante DS_FIND_CHILDREN); 2266 4007 mmusante kmem_free(fsname, len + 1); 2267 4007 mmusante 2268 4007 mmusante if (err == 0) { 2269 4007 mmusante err = dsl_sync_task_group_wait(ra->dstg); 2270 4007 mmusante } 2271 4007 mmusante 2272 4007 mmusante for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2273 4007 mmusante dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2274 4007 mmusante dsl_dataset_t *ds = dst->dst_arg1; 2275 4007 mmusante if (dst->dst_err) { 2276 4007 mmusante dsl_dir_name(ds->ds_dir, ra->failed); 2277 4009 mmusante (void) strcat(ra->failed, "@"); 2278 4009 mmusante (void) strcat(ra->failed, ra->newsnap); 2279 4007 mmusante } 2280 6689 maybee dsl_dataset_rele(ds, ra->dstg); 2281 4007 mmusante } 2282 4007 mmusante 2283 4543 marks if (err) 2284 4543 marks (void) strcpy(oldname, ra->failed); 2285 4007 mmusante 2286 4007 mmusante dsl_sync_task_group_destroy(ra->dstg); 2287 5326 ek110237 kmem_free(ra, sizeof (struct renamesnaparg)); 2288 4007 mmusante spa_close(spa, FTAG); 2289 4007 mmusante return (err); 2290 4007 mmusante } 2291 4007 mmusante 2292 4569 mmusante static int 2293 4569 mmusante dsl_valid_rename(char *oldname, void *arg) 2294 4569 mmusante { 2295 4569 mmusante int delta = *(int *)arg; 2296 4569 mmusante 2297 4569 mmusante if (strlen(oldname) + delta >= MAXNAMELEN) 2298 4569 mmusante return (ENAMETOOLONG); 2299 4569 mmusante 2300 4569 mmusante return (0); 2301 4569 mmusante } 2302 4569 mmusante 2303 789 ahrens #pragma weak dmu_objset_rename = dsl_dataset_rename 2304 789 ahrens int 2305 6689 maybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2306 789 ahrens { 2307 789 ahrens dsl_dir_t *dd; 2308 2199 ahrens dsl_dataset_t *ds; 2309 789 ahrens const char *tail; 2310 789 ahrens int err; 2311 789 ahrens 2312 2199 ahrens err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2313 1544 eschrock if (err) 2314 1544 eschrock return (err); 2315 8517 Eric /* 2316 8517 Eric * If there are more than 2 references there may be holds 2317 8517 Eric * hanging around that haven't been cleared out yet. 2318 8517 Eric */ 2319 8517 Eric if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2320 8517 Eric txg_wait_synced(dd->dd_pool, 0); 2321 789 ahrens if (tail == NULL) { 2322 4569 mmusante int delta = strlen(newname) - strlen(oldname); 2323 4569 mmusante 2324 7046 ahrens /* if we're growing, validate child name lengths */ 2325 4569 mmusante if (delta > 0) 2326 4569 mmusante err = dmu_objset_find(oldname, dsl_valid_rename, 2327 4569 mmusante &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2328 4569 mmusante 2329 4569 mmusante if (!err) 2330 4569 mmusante err = dsl_dir_rename(dd, newname); 2331 789 ahrens dsl_dir_close(dd, FTAG); 2332 789 ahrens return (err); 2333 789 ahrens } 2334 789 ahrens if (tail[0] != '@') { 2335 10588 Eric /* the name ended in a nonexistent component */ 2336 789 ahrens dsl_dir_close(dd, FTAG); 2337 789 ahrens return (ENOENT); 2338 789 ahrens } 2339 789 ahrens 2340 2199 ahrens dsl_dir_close(dd, FTAG); 2341 789 ahrens 2342 2199 ahrens /* new name must be snapshot in same filesystem */ 2343 2199 ahrens tail = strchr(newname, '@'); 2344 2199 ahrens if (tail == NULL) 2345 2199 ahrens return (EINVAL); 2346 2199 ahrens tail++; 2347 2199 ahrens if (strncmp(oldname, newname, tail - newname) != 0) 2348 2199 ahrens return (EXDEV); 2349 2199 ahrens 2350 4007 mmusante if (recursive) { 2351 4007 mmusante err = dsl_recursive_rename(oldname, newname); 2352 4007 mmusante } else { 2353 6689 maybee err = dsl_dataset_hold(oldname, FTAG, &ds); 2354 4007 mmusante if (err) 2355 4007 mmusante return (err); 2356 2199 ahrens 2357 4007 mmusante err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2358 4007 mmusante dsl_dataset_snapshot_rename_check, 2359 4007 mmusante dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2360 2199 ahrens 2361 6689 maybee dsl_dataset_rele(ds, FTAG); 2362 4007 mmusante } 2363 2199 ahrens 2364 789 ahrens return (err); 2365 789 ahrens } 2366 2082 eschrock 2367 7046 ahrens struct promotenode { 2368 6689 maybee list_node_t link; 2369 6689 maybee dsl_dataset_t *ds; 2370 6689 maybee }; 2371 6689 maybee 2372 2199 ahrens struct promotearg { 2373 7390 Matthew list_t shared_snaps, origin_snaps, clone_snaps; 2374 7390 Matthew dsl_dataset_t *origin_origin, *origin_head; 2375 7390 Matthew uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2376 10588 Eric char *err_ds; 2377 2199 ahrens }; 2378 7390 Matthew 2379 7390 Matthew static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2380 2199 ahrens 2381 4543 marks /* ARGSUSED */ 2382 2082 eschrock static int 2383 2199 ahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2384 2082 eschrock { 2385 2199 ahrens dsl_dataset_t *hds = arg1; 2386 2199 ahrens struct promotearg *pa = arg2; 2387 7390 Matthew struct promotenode *snap = list_head(&pa->shared_snaps); 2388 6689 maybee dsl_dataset_t *origin_ds = snap->ds; 2389 6689 maybee int err; 2390 2082 eschrock 2391 7046 ahrens /* Check that it is a real clone */ 2392 7046 ahrens if (!dsl_dir_is_clone(hds->ds_dir)) 2393 2082 eschrock return (EINVAL); 2394 2082 eschrock 2395 2199 ahrens /* Since this is so expensive, don't do the preliminary check */ 2396 2199 ahrens if (!dmu_tx_is_syncing(tx)) 2397 2199 ahrens return (0); 2398 2199 ahrens 2399 6689 maybee if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2400 6689 maybee return (EXDEV); 2401 2082 eschrock 2402 5367 ahrens /* compute origin's new unique space */ 2403 7390 Matthew snap = list_tail(&pa->clone_snaps); 2404 7390 Matthew ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2405 7390 Matthew err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2406 7390 Matthew origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2407 7390 Matthew if (err) 2408 6689 maybee return (err); 2409 6689 maybee 2410 6689 maybee /* 2411 6689 maybee * Walk the snapshots that we are moving 2412 6689 maybee * 2413 7390 Matthew * Compute space to transfer. Consider the incremental changes 2414 7390 Matthew * to used for each snapshot: 2415 7390 Matthew * (my used) = (prev's used) + (blocks born) - (blocks killed) 2416 7390 Matthew * So each snapshot gave birth to: 2417 7390 Matthew * (blocks born) = (my used) - (prev's used) + (blocks killed) 2418 6689 maybee * So a sequence would look like: 2419 7390 Matthew * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2420 6689 maybee * Which simplifies to: 2421 7390 Matthew * uN + kN + kN-1 + ... + k1 + k0 2422 6689 maybee * Note however, if we stop before we reach the ORIGIN we get: 2423 7390 Matthew * uN + kN + kN-1 + ... + kM - uM-1 2424 6689 maybee */ 2425 6689 maybee pa->used = origin_ds->ds_phys->ds_used_bytes; 2426 6689 maybee pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2427 6689 maybee pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2428 7390 Matthew for (snap = list_head(&pa->shared_snaps); snap; 2429 7390 Matthew snap = list_next(&pa->shared_snaps, snap)) { 2430 2082 eschrock uint64_t val, dlused, dlcomp, dluncomp; 2431 6689 maybee dsl_dataset_t *ds = snap->ds; 2432 2082 eschrock 2433 2082 eschrock /* Check that the snapshot name does not conflict */ 2434 7390 Matthew VERIFY(0 == dsl_dataset_get_snapname(ds)); 2435 6689 maybee err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2436 10588 Eric if (err == 0) { 2437 10588 Eric err = EEXIST; 2438 10588 Eric goto out; 2439 10588 Eric } 2440 6689 maybee if (err != ENOENT) 2441 10588 Eric goto out; 2442 6689 maybee 2443 6689 maybee /* The very first snapshot does not have a deadlist */ 2444 7390 Matthew if (ds->ds_phys->ds_prev_snap_obj == 0) 2445 7390 Matthew continue; 2446 7390 Matthew 2447 7390 Matthew if (err = bplist_space(&ds->ds_deadlist, 2448 7390 Matthew &dlused, &dlcomp, &dluncomp)) 2449 10588 Eric goto out; 2450 7390 Matthew pa->used += dlused; 2451 7390 Matthew pa->comp += dlcomp; 2452 7390 Matthew pa->uncomp += dluncomp; 2453 7390 Matthew } 2454 2082 eschrock 2455 6689 maybee /* 2456 6689 maybee * If we are a clone of a clone then we never reached ORIGIN, 2457 6689 maybee * so we need to subtract out the clone origin's used space. 2458 6689 maybee */ 2459 7390 Matthew if (pa->origin_origin) { 2460 7390 Matthew pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2461 7390 Matthew pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2462 7390 Matthew pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2463 2082 eschrock } 2464 2082 eschrock 2465 7390 Matthew /* Check that there is enough space here */ 2466 7390 Matthew err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2467 7390 Matthew pa->used); 2468 7390 Matthew if (err) 2469 7390 Matthew return (err); 2470 6689 maybee 2471 7390 Matthew /* 2472 7390 Matthew * Compute the amounts of space that will be used by snapshots 2473 7390 Matthew * after the promotion (for both origin and clone). For each, 2474 7390 Matthew * it is the amount of space that will be on all of their 2475 7390 Matthew * deadlists (that was not born before their new origin). 2476 7390 Matthew */ 2477 7390 Matthew if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2478 7390 Matthew uint64_t space; 2479 7390 Matthew 2480 7390 Matthew /* 2481 7390 Matthew * Note, typically this will not be a clone of a clone, 2482 7390 Matthew * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2483 7390 Matthew * these snaplist_space() -> bplist_space_birthrange() 2484 7390 Matthew * calls will be fast because they do not have to 2485 7390 Matthew * iterate over all bps. 2486 7390 Matthew */ 2487 7390 Matthew snap = list_head(&pa->origin_snaps); 2488 7390 Matthew err = snaplist_space(&pa->shared_snaps, 2489 7390 Matthew snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2490 7390 Matthew if (err) 2491 7390 Matthew return (err); 2492 7390 Matthew 2493 7390 Matthew err = snaplist_space(&pa->clone_snaps, 2494 7390 Matthew snap->ds->ds_origin_txg, &space); 2495 7390 Matthew if (err) 2496 7390 Matthew return (err); 2497 7390 Matthew pa->cloneusedsnap += space; 2498 7390 Matthew } 2499 7390 Matthew if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2500 7390 Matthew err = snaplist_space(&pa->origin_snaps, 2501 7390 Matthew origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2502 7390 Matthew if (err) 2503 7390 Matthew return (err); 2504 6689 maybee } 2505 2082 eschrock 2506 7390 Matthew return (0); 2507 10588 Eric out: 2508 10588 Eric pa->err_ds = snap->ds->ds_snapname; 2509 10588 Eric return (err); 2510 2199 ahrens } 2511 2199 ahrens 2512 2199 ahrens static void 2513 4543 marks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2514 2199 ahrens { 2515 2199 ahrens dsl_dataset_t *hds = arg1; 2516 2199 ahrens struct promotearg *pa = arg2; 2517 7390 Matthew struct promotenode *snap = list_head(&pa->shared_snaps); 2518 6689 maybee dsl_dataset_t *origin_ds = snap->ds; 2519 7390 Matthew dsl_dataset_t *origin_head; 2520 2199 ahrens dsl_dir_t *dd = hds->ds_dir; 2521 2199 ahrens dsl_pool_t *dp = hds->ds_dir->dd_pool; 2522 5367 ahrens dsl_dir_t *odd = NULL; 2523 7046 ahrens uint64_t oldnext_obj; 2524 7390 Matthew int64_t delta; 2525 2199 ahrens 2526 2199 ahrens ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2527 7390 Matthew 2528 7390 Matthew snap = list_head(&pa->origin_snaps); 2529 7390 Matthew origin_head = snap->ds; 2530 2199 ahrens 2531 2417 ahrens /* 2532 5367 ahrens * We need to explicitly open odd, since origin_ds's dd will be 2533 2417 ahrens * changing. 2534 2417 ahrens */ 2535 5367 ahrens VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2536 5367 ahrens NULL, FTAG, &odd)); 2537 2082 eschrock 2538 6689 maybee /* change origin's next snap */ 2539 6689 maybee dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2540 7046 ahrens oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2541 7390 Matthew snap = list_tail(&pa->clone_snaps); 2542 7390 Matthew ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2543 7390 Matthew origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2544 7046 ahrens 2545 7046 ahrens /* change the origin's next clone */ 2546 7046 ahrens if (origin_ds->ds_phys->ds_next_clones_obj) { 2547 10801 Matthew remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); 2548 7046 ahrens VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2549 7046 ahrens origin_ds->ds_phys->ds_next_clones_obj, 2550 7046 ahrens oldnext_obj, tx)); 2551 7046 ahrens } 2552 6689 maybee 2553 6689 maybee /* change origin */ 2554 6689 maybee dmu_buf_will_dirty(dd->dd_dbuf, tx); 2555 6689 maybee ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2556 6689 maybee dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2557 7390 Matthew hds->ds_origin_txg = origin_head->ds_origin_txg; 2558 6689 maybee dmu_buf_will_dirty(odd->dd_dbuf, tx); 2559 6689 maybee odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2560 7390 Matthew origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2561 6689 maybee 2562 2082 eschrock /* move snapshots to this dir */ 2563 7390 Matthew for (snap = list_head(&pa->shared_snaps); snap; 2564 7390 Matthew snap = list_next(&pa->shared_snaps, snap)) { 2565 6689 maybee dsl_dataset_t *ds = snap->ds; 2566 2082 eschrock 2567 7237 ek110237 /* unregister props as dsl_dir is changing */ 2568 10298 Matthew if (ds->ds_objset) { 2569 10298 Matthew dmu_objset_evict(ds->ds_objset); 2570 10298 Matthew ds->ds_objset = NULL; 2571 7237 ek110237 } 2572 2082 eschrock /* move snap name entry */ 2573 7390 Matthew VERIFY(0 == dsl_dataset_get_snapname(ds)); 2574 7390 Matthew VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2575 6689 maybee ds->ds_snapname, tx)); 2576 2199 ahrens VERIFY(0 == zap_add(dp->dp_meta_objset, 2577 2082 eschrock hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2578 2082 eschrock 8, 1, &ds->ds_object, tx)); 2579 2082 eschrock /* change containing dsl_dir */ 2580 2082 eschrock dmu_buf_will_dirty(ds->ds_dbuf, tx); 2581 5367 ahrens ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2582 2082 eschrock ds->ds_phys->ds_dir_obj = dd->dd_object; 2583 5367 ahrens ASSERT3P(ds->ds_dir, ==, odd); 2584 2082 eschrock dsl_dir_close(ds->ds_dir, ds); 2585 2199 ahrens VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2586 2082 eschrock NULL, ds, &ds->ds_dir)); 2587 2082 eschrock 2588 2082 eschrock ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2589 7390 Matthew } 2590 2082 eschrock 2591 7390 Matthew /* 2592 7390 Matthew * Change space accounting. 2593 7390 Matthew * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2594 7390 Matthew * both be valid, or both be 0 (resulting in delta == 0). This 2595 7390 Matthew * is true for each of {clone,origin} independently. 2596 7390 Matthew */ 2597 7390 Matthew 2598 7390 Matthew delta = pa->cloneusedsnap - 2599 7390 Matthew dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2600 7390 Matthew ASSERT3S(delta, >=, 0); 2601 7390 Matthew ASSERT3U(pa->used, >=, delta); 2602 7390 Matthew dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2603 7390 Matthew dsl_dir_diduse_space(dd, DD_USED_HEAD, 2604 7390 Matthew pa->used - delta, pa->comp, pa->uncomp, tx); 2605 7390 Matthew 2606 7390 Matthew delta = pa->originusedsnap - 2607 7390 Matthew odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2608 7390 Matthew ASSERT3S(delta, <=, 0); 2609 7390 Matthew ASSERT3U(pa->used, >=, -delta); 2610 7390 Matthew dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2611 7390 Matthew dsl_dir_diduse_space(odd, DD_USED_HEAD, 2612 7390 Matthew -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2613 7390 Matthew 2614 5367 ahrens origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2615 2082 eschrock 2616 4543 marks /* log history record */ 2617 4543 marks spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2618 6689 maybee cr, "dataset = %llu", hds->ds_object); 2619 4543 marks 2620 5367 ahrens dsl_dir_close(odd, FTAG); 2621 2082 eschrock } 2622 2082 eschrock 2623 7390 Matthew static char *snaplist_tag = "snaplist"; 2624 7390 Matthew /* 2625 7390 Matthew * Make a list of dsl_dataset_t's for the snapshots between first_obj 2626 7390 Matthew * (exclusive) and last_obj (inclusive). The list will be in reverse 2627 7390 Matthew * order (last_obj will be the list_head()). If first_obj == 0, do all 2628 7390 Matthew * snapshots back to this dataset's origin. 2629 7390 Matthew */ 2630 7390 Matthew static int 2631 7390 Matthew snaplist_make(dsl_pool_t *dp, boolean_t own, 2632 7390 Matthew uint64_t first_obj, uint64_t last_obj, list_t *l) 2633 7390 Matthew { 2634 7390 Matthew uint64_t obj = last_obj; 2635 7390 Matthew 2636 7390 Matthew ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2637 7390 Matthew 2638 7390 Matthew list_create(l, sizeof (struct promotenode), 2639 7390 Matthew offsetof(struct promotenode, link)); 2640 7390 Matthew 2641 7390 Matthew while (obj != first_obj) { 2642 7390 Matthew dsl_dataset_t *ds; 2643 7390 Matthew struct promotenode *snap; 2644 7390 Matthew int err; 2645 7390 Matthew 2646 7390 Matthew if (own) { 2647 7390 Matthew err = dsl_dataset_own_obj(dp, obj, 2648 7390 Matthew 0, snaplist_tag, &ds); 2649 7390 Matthew if (err == 0) 2650 7390 Matthew dsl_dataset_make_exclusive(ds, snaplist_tag); 2651 7390 Matthew } else { 2652 7390 Matthew err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2653 7390 Matthew } 2654 7390 Matthew if (err == ENOENT) { 2655 7390 Matthew /* lost race with snapshot destroy */ 2656 7390 Matthew struct promotenode *last = list_tail(l); 2657 7390 Matthew ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2658 7390 Matthew obj = last->ds->ds_phys->ds_prev_snap_obj; 2659 7390 Matthew continue; 2660 7390 Matthew } else if (err) { 2661 7390 Matthew return (err); 2662 7390 Matthew } 2663 7390 Matthew 2664 7390 Matthew if (first_obj == 0) 2665 7390 Matthew first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2666 7390 Matthew 2667 7390 Matthew snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2668 7390 Matthew snap->ds = ds; 2669 7390 Matthew list_insert_tail(l, snap); 2670 7390 Matthew obj = ds->ds_phys->ds_prev_snap_obj; 2671 7390 Matthew } 2672 7390 Matthew 2673 7390 Matthew return (0); 2674 7390 Matthew } 2675 7390 Matthew 2676 7390 Matthew static int 2677 7390 Matthew snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2678 7390 Matthew { 2679 7390 Matthew struct promotenode *snap; 2680 7390 Matthew 2681 7390 Matthew *spacep = 0; 2682 7390 Matthew for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2683 7390 Matthew uint64_t used; 2684 7390 Matthew int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2685 7390 Matthew mintxg, UINT64_MAX, &used); 2686 7390 Matthew if (err) 2687 7390 Matthew return (err); 2688 7390 Matthew *spacep += used; 2689 7390 Matthew } 2690 7390 Matthew return (0); 2691 7390 Matthew } 2692 7390 Matthew 2693 7390 Matthew static void 2694 7390 Matthew snaplist_destroy(list_t *l, boolean_t own) 2695 7390 Matthew { 2696 7390 Matthew struct promotenode *snap; 2697 7390 Matthew 2698 8779 Mark if (!l || !list_link_active(&l->list_head)) 2699 7390 Matthew return; 2700 7390 Matthew 2701 7390 Matthew while ((snap = list_tail(l)) != NULL) { 2702 7390 Matthew list_remove(l, snap); 2703 7390 Matthew if (own) 2704 7390 Matthew dsl_dataset_disown(snap->ds, snaplist_tag); 2705 7390 Matthew else 2706 7390 Matthew dsl_dataset_rele(snap->ds, snaplist_tag); 2707 7390 Matthew kmem_free(snap, sizeof (struct promotenode)); 2708 7390 Matthew } 2709 7390 Matthew list_destroy(l); 2710 7390 Matthew } 2711 7390 Matthew 2712 7390 Matthew /* 2713 7390 Matthew * Promote a clone. Nomenclature note: 2714 7390 Matthew * "clone" or "cds": the original clone which is being promoted 2715 7390 Matthew * "origin" or "ods": the snapshot which is originally clone's origin 2716 7390 Matthew * "origin head" or "ohds": the dataset which is the head 2717 7390 Matthew * (filesystem/volume) for the origin 2718 7390 Matthew * "origin origin": the origin of the origin's filesystem (typically 2719 7390 Matthew * NULL, indicating that the clone is not a clone of a clone). 2720 7390 Matthew */ 2721 2082 eschrock int 2722 10588 Eric dsl_dataset_promote(const char *name, char *conflsnap) 2723 2082 eschrock { 2724 2082 eschrock dsl_dataset_t *ds; 2725 6689 maybee dsl_dir_t *dd; 2726 6689 maybee dsl_pool_t *dp; 2727 2082 eschrock dmu_object_info_t doi; 2728 7390 Matthew struct promotearg pa = { 0 }; 2729 7046 ahrens struct promotenode *snap; 2730 6689 maybee int err; 2731 2082 eschrock 2732 6689 maybee err = dsl_dataset_hold(name, FTAG, &ds); 2733 2082 eschrock if (err) 2734 2082 eschrock return (err); 2735 6689 maybee dd = ds->ds_dir; 2736 6689 maybee dp = dd->dd_pool; 2737 2082 eschrock 2738 6689 maybee err = dmu_object_info(dp->dp_meta_objset, 2739 2082 eschrock ds->ds_phys->ds_snapnames_zapobj, &doi); 2740 2082 eschrock if (err) { 2741 6689 maybee dsl_dataset_rele(ds, FTAG); 2742 2082 eschrock return (err); 2743 2082 eschrock } 2744 6689 maybee 2745 7390 Matthew if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2746 7390 Matthew dsl_dataset_rele(ds, FTAG); 2747 7390 Matthew return (EINVAL); 2748 7390 Matthew } 2749 7390 Matthew 2750 6689 maybee /* 2751 6689 maybee * We are going to inherit all the snapshots taken before our 2752 6689 maybee * origin (i.e., our new origin will be our parent's origin). 2753 6689 maybee * Take ownership of them so that we can rename them into our 2754 6689 maybee * namespace. 2755 6689 maybee */ 2756 6689 maybee rw_enter(&dp->dp_config_rwlock, RW_READER); 2757 7046 ahrens 2758 7390 Matthew err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2759 7390 Matthew &pa.shared_snaps); 2760 7390 Matthew if (err != 0) 2761 7390 Matthew goto out; 2762 7046 ahrens 2763 7390 Matthew err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2764 7390 Matthew if (err != 0) 2765 7390 Matthew goto out; 2766 7390 Matthew 2767 7390 Matthew snap = list_head(&pa.shared_snaps); 2768 7390 Matthew ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2769 7390 Matthew err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2770 7390 Matthew snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2771 7390 Matthew if (err != 0) 2772 7390 Matthew goto out; 2773 7390 Matthew 2774 7390 Matthew if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2775 7390 Matthew err = dsl_dataset_own_obj(dp, 2776 7390 Matthew snap->ds->ds_dir->dd_phys->dd_origin_obj, 2777 7390 Matthew 0, FTAG, &pa.origin_origin); 2778 7390 Matthew if (err != 0) 2779 6689 maybee goto out; 2780 7390 Matthew } 2781 7046 ahrens 2782 7390 Matthew out: 2783 6689 maybee rw_exit(&dp->dp_config_rwlock); 2784 2082 eschrock 2785 2082 eschrock /* 2786 2082 eschrock * Add in 128x the snapnames zapobj size, since we will be moving 2787 2082 eschrock * a bunch of snapnames to the promoted ds, and dirtying their 2788 2082 eschrock * bonus buffers. 2789 2082 eschrock */ 2790 7390 Matthew if (err == 0) { 2791 7390 Matthew err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2792 7390 Matthew dsl_dataset_promote_sync, ds, &pa, 2793 10922 Jeff 2 + 2 * doi.doi_physical_blocks_512); 2794 10588 Eric if (err && pa.err_ds && conflsnap) 2795