1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 8746 Matthew * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens #include <sys/zfs_context.h> 27 789 ahrens #include <sys/dmu_objset.h> 28 789 ahrens #include <sys/dmu_traverse.h> 29 789 ahrens #include <sys/dsl_dataset.h> 30 789 ahrens #include <sys/dsl_dir.h> 31 789 ahrens #include <sys/dsl_pool.h> 32 789 ahrens #include <sys/dnode.h> 33 789 ahrens #include <sys/spa.h> 34 789 ahrens #include <sys/zio.h> 35 789 ahrens #include <sys/dmu_impl.h> 36 7837 Matthew #include <sys/callb.h> 37 789 ahrens 38 7837 Matthew struct prefetch_data { 39 7837 Matthew kmutex_t pd_mtx; 40 7837 Matthew kcondvar_t pd_cv; 41 7837 Matthew int pd_blks_max; 42 7837 Matthew int pd_blks_fetched; 43 7837 Matthew int pd_flags; 44 7837 Matthew boolean_t pd_cancel; 45 7837 Matthew boolean_t pd_exited; 46 7837 Matthew }; 47 789 ahrens 48 7837 Matthew struct traverse_data { 49 7837 Matthew spa_t *td_spa; 50 7837 Matthew uint64_t td_objset; 51 7837 Matthew blkptr_t *td_rootbp; 52 7837 Matthew uint64_t td_min_txg; 53 7837 Matthew int td_flags; 54 7837 Matthew struct prefetch_data *td_pfd; 55 7837 Matthew blkptr_cb_t *td_func; 56 7837 Matthew void *td_arg; 57 7837 Matthew }; 58 9396 Matthew 59 9396 Matthew static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp, 60 9396 Matthew arc_buf_t *buf, uint64_t objset, uint64_t object); 61 789 ahrens 62 1544 eschrock /* ARGSUSED */ 63 10922 Jeff static int 64 1601 bonwick traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) 65 1544 eschrock { 66 7837 Matthew struct traverse_data *td = arg; 67 7837 Matthew zbookmark_t zb; 68 1544 eschrock 69 7837 Matthew if (bp->blk_birth == 0) 70 10922 Jeff return (0); 71 1601 bonwick 72 7837 Matthew if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa)) 73 10922 Jeff return (0); 74 7837 Matthew 75 10922 Jeff SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, 76 10922 Jeff bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); 77 10922 Jeff 78 10922 Jeff (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg); 79 10922 Jeff 80 10922 Jeff return (0); 81 1544 eschrock } 82 1544 eschrock 83 1544 eschrock /* ARGSUSED */ 84 10922 Jeff static int 85 1601 bonwick traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) 86 1544 eschrock { 87 7837 Matthew struct traverse_data *td = arg; 88 1544 eschrock 89 1544 eschrock if (lrc->lrc_txtype == TX_WRITE) { 90 1544 eschrock lr_write_t *lr = (lr_write_t *)lrc; 91 1544 eschrock blkptr_t *bp = &lr->lr_blkptr; 92 7837 Matthew zbookmark_t zb; 93 1544 eschrock 94 7837 Matthew if (bp->blk_birth == 0) 95 10922 Jeff return (0); 96 1601 bonwick 97 7837 Matthew if (claim_txg == 0 || bp->blk_birth < claim_txg) 98 10922 Jeff return (0); 99 7837 Matthew 100 10922 Jeff SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, ZB_ZIL_LEVEL, 101 10922 Jeff lr->lr_offset / BP_GET_LSIZE(bp)); 102 10922 Jeff 103 10922 Jeff (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, 104 10922 Jeff td->td_arg); 105 1544 eschrock } 106 10922 Jeff return (0); 107 1544 eschrock } 108 1544 eschrock 109 1544 eschrock static void 110 7837 Matthew traverse_zil(struct traverse_data *td, zil_header_t *zh) 111 1544 eschrock { 112 1601 bonwick uint64_t claim_txg = zh->zh_claim_txg; 113 1544 eschrock zilog_t *zilog; 114 1544 eschrock 115 1601 bonwick /* 116 1601 bonwick * We only want to visit blocks that have been claimed but not yet 117 10922 Jeff * replayed; plus, in read-only mode, blocks that are already stable. 118 1601 bonwick */ 119 8241 Jeff if (claim_txg == 0 && spa_writeable(td->td_spa)) 120 1601 bonwick return; 121 1601 bonwick 122 7837 Matthew zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); 123 1544 eschrock 124 7837 Matthew (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, 125 1601 bonwick claim_txg); 126 1544 eschrock 127 1544 eschrock zil_free(zilog); 128 1544 eschrock } 129 1544 eschrock 130 789 ahrens static int 131 7837 Matthew traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp, 132 7837 Matthew arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb) 133 789 ahrens { 134 8012 Eric zbookmark_t czb; 135 7837 Matthew int err = 0; 136 7837 Matthew arc_buf_t *buf = NULL; 137 7837 Matthew struct prefetch_data *pd = td->td_pfd; 138 789 ahrens 139 7837 Matthew if (bp->blk_birth == 0) { 140 10922 Jeff err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg); 141 7837 Matthew return (err); 142 789 ahrens } 143 789 ahrens 144 7837 Matthew if (bp->blk_birth <= td->td_min_txg) 145 7837 Matthew return (0); 146 789 ahrens 147 7837 Matthew if (pd && !pd->pd_exited && 148 7837 Matthew ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) || 149 7837 Matthew BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) { 150 7837 Matthew mutex_enter(&pd->pd_mtx); 151 7837 Matthew ASSERT(pd->pd_blks_fetched >= 0); 152 7837 Matthew while (pd->pd_blks_fetched == 0 && !pd->pd_exited) 153 7837 Matthew cv_wait(&pd->pd_cv, &pd->pd_mtx); 154 7837 Matthew pd->pd_blks_fetched--; 155 7837 Matthew cv_broadcast(&pd->pd_cv); 156 7837 Matthew mutex_exit(&pd->pd_mtx); 157 789 ahrens } 158 789 ahrens 159 7837 Matthew if (td->td_flags & TRAVERSE_PRE) { 160 10922 Jeff err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); 161 7837 Matthew if (err) 162 7837 Matthew return (err); 163 789 ahrens } 164 789 ahrens 165 7837 Matthew if (BP_GET_LEVEL(bp) > 0) { 166 7837 Matthew uint32_t flags = ARC_WAIT; 167 7837 Matthew int i; 168 7837 Matthew blkptr_t *cbp; 169 7837 Matthew int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 170 789 ahrens 171 7837 Matthew err = arc_read(NULL, td->td_spa, bp, pbuf, 172 7837 Matthew arc_getbuf_func, &buf, 173 7837 Matthew ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 174 7837 Matthew if (err) 175 7837 Matthew return (err); 176 789 ahrens 177 7837 Matthew /* recursively visitbp() blocks below this */ 178 7837 Matthew cbp = buf->b_data; 179 7837 Matthew for (i = 0; i < epb; i++, cbp++) { 180 7837 Matthew SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 181 7837 Matthew zb->zb_level - 1, 182 7837 Matthew zb->zb_blkid * epb + i); 183 7837 Matthew err = traverse_visitbp(td, dnp, buf, cbp, &czb); 184 7837 Matthew if (err) 185 7837 Matthew break; 186 7837 Matthew } 187 7837 Matthew } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { 188 7837 Matthew uint32_t flags = ARC_WAIT; 189 9396 Matthew int i; 190 7837 Matthew int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; 191 7837 Matthew 192 7837 Matthew err = arc_read(NULL, td->td_spa, bp, pbuf, 193 7837 Matthew arc_getbuf_func, &buf, 194 7837 Matthew ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 195 7837 Matthew if (err) 196 7837 Matthew return (err); 197 7837 Matthew 198 7837 Matthew /* recursively visitbp() blocks below this */ 199 7837 Matthew dnp = buf->b_data; 200 7837 Matthew for (i = 0; i < epb && err == 0; i++, dnp++) { 201 9396 Matthew err = traverse_dnode(td, dnp, buf, zb->zb_objset, 202 9396 Matthew zb->zb_blkid * epb + i); 203 9396 Matthew if (err) 204 9396 Matthew break; 205 789 ahrens } 206 7837 Matthew } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { 207 7837 Matthew uint32_t flags = ARC_WAIT; 208 7837 Matthew objset_phys_t *osp; 209 9396 Matthew dnode_phys_t *dnp; 210 789 ahrens 211 7837 Matthew err = arc_read_nolock(NULL, td->td_spa, bp, 212 7837 Matthew arc_getbuf_func, &buf, 213 7837 Matthew ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 214 7837 Matthew if (err) 215 7837 Matthew return (err); 216 789 ahrens 217 7837 Matthew osp = buf->b_data; 218 7837 Matthew traverse_zil(td, &osp->os_zil_header); 219 789 ahrens 220 9396 Matthew dnp = &osp->os_meta_dnode; 221 10922 Jeff err = traverse_dnode(td, dnp, buf, zb->zb_objset, 222 10922 Jeff DMU_META_DNODE_OBJECT); 223 9396 Matthew if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { 224 9396 Matthew dnp = &osp->os_userused_dnode; 225 9396 Matthew err = traverse_dnode(td, dnp, buf, zb->zb_objset, 226 9396 Matthew DMU_USERUSED_OBJECT); 227 9396 Matthew } 228 9396 Matthew if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { 229 9396 Matthew dnp = &osp->os_groupused_dnode; 230 9396 Matthew err = traverse_dnode(td, dnp, buf, zb->zb_objset, 231 9396 Matthew DMU_GROUPUSED_OBJECT); 232 789 ahrens } 233 789 ahrens } 234 789 ahrens 235 7837 Matthew if (buf) 236 7837 Matthew (void) arc_buf_remove_ref(buf, &buf); 237 789 ahrens 238 7837 Matthew if (err == 0 && (td->td_flags & TRAVERSE_POST)) 239 10922 Jeff err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); 240 7837 Matthew 241 9396 Matthew return (err); 242 9396 Matthew } 243 9396 Matthew 244 9396 Matthew static int 245 9396 Matthew traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp, 246 9396 Matthew arc_buf_t *buf, uint64_t objset, uint64_t object) 247 9396 Matthew { 248 9396 Matthew int j, err = 0; 249 9396 Matthew zbookmark_t czb; 250 9396 Matthew 251 9396 Matthew for (j = 0; j < dnp->dn_nblkptr; j++) { 252 9396 Matthew SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); 253 9396 Matthew err = traverse_visitbp(td, dnp, buf, 254 9396 Matthew (blkptr_t *)&dnp->dn_blkptr[j], &czb); 255 9396 Matthew if (err) 256 9396 Matthew break; 257 9396 Matthew } 258 7837 Matthew return (err); 259 7837 Matthew } 260 7837 Matthew 261 7837 Matthew /* ARGSUSED */ 262 7837 Matthew static int 263 10922 Jeff traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 264 10922 Jeff const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 265 7837 Matthew { 266 7837 Matthew struct prefetch_data *pfd = arg; 267 7837 Matthew uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 268 7837 Matthew 269 7837 Matthew ASSERT(pfd->pd_blks_fetched >= 0); 270 7837 Matthew if (pfd->pd_cancel) 271 7837 Matthew return (EINTR); 272 7837 Matthew 273 7837 Matthew if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) || 274 7837 Matthew BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) 275 7837 Matthew return (0); 276 7837 Matthew 277 7837 Matthew mutex_enter(&pfd->pd_mtx); 278 7837 Matthew while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max) 279 7837 Matthew cv_wait(&pfd->pd_cv, &pfd->pd_mtx); 280 7837 Matthew pfd->pd_blks_fetched++; 281 7837 Matthew cv_broadcast(&pfd->pd_cv); 282 7837 Matthew mutex_exit(&pfd->pd_mtx); 283 7837 Matthew 284 7837 Matthew (void) arc_read_nolock(NULL, spa, bp, NULL, NULL, 285 7837 Matthew ZIO_PRIORITY_ASYNC_READ, 286 7837 Matthew ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 287 7837 Matthew &aflags, zb); 288 7837 Matthew 289 7837 Matthew return (0); 290 7837 Matthew } 291 7837 Matthew 292 7837 Matthew static void 293 7837 Matthew traverse_prefetch_thread(void *arg) 294 7837 Matthew { 295 7837 Matthew struct traverse_data *td_main = arg; 296 7837 Matthew struct traverse_data td = *td_main; 297 7837 Matthew zbookmark_t czb; 298 7837 Matthew 299 7837 Matthew td.td_func = traverse_prefetcher; 300 7837 Matthew td.td_arg = td_main->td_pfd; 301 7837 Matthew td.td_pfd = NULL; 302 7837 Matthew 303 10922 Jeff SET_BOOKMARK(&czb, td.td_objset, 304 10922 Jeff ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 305 7837 Matthew (void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb); 306 7837 Matthew 307 7837 Matthew mutex_enter(&td_main->td_pfd->pd_mtx); 308 7837 Matthew td_main->td_pfd->pd_exited = B_TRUE; 309 7837 Matthew cv_broadcast(&td_main->td_pfd->pd_cv); 310 7837 Matthew mutex_exit(&td_main->td_pfd->pd_mtx); 311 789 ahrens } 312 789 ahrens 313 789 ahrens /* 314 7837 Matthew * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 315 7837 Matthew * in syncing context). 316 789 ahrens */ 317 7837 Matthew static int 318 7837 Matthew traverse_impl(spa_t *spa, uint64_t objset, blkptr_t *rootbp, 319 7837 Matthew uint64_t txg_start, int flags, blkptr_cb_t func, void *arg) 320 789 ahrens { 321 7837 Matthew struct traverse_data td; 322 7837 Matthew struct prefetch_data pd = { 0 }; 323 7837 Matthew zbookmark_t czb; 324 789 ahrens int err; 325 789 ahrens 326 7837 Matthew td.td_spa = spa; 327 7837 Matthew td.td_objset = objset; 328 7837 Matthew td.td_rootbp = rootbp; 329 7837 Matthew td.td_min_txg = txg_start; 330 7837 Matthew td.td_func = func; 331 7837 Matthew td.td_arg = arg; 332 7837 Matthew td.td_pfd = &pd; 333 7837 Matthew td.td_flags = flags; 334 789 ahrens 335 7837 Matthew pd.pd_blks_max = 100; 336 7837 Matthew pd.pd_flags = flags; 337 7837 Matthew mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL); 338 7837 Matthew cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); 339 789 ahrens 340 7837 Matthew if (!(flags & TRAVERSE_PREFETCH) || 341 7837 Matthew 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread, 342 7837 Matthew &td, TQ_NOQUEUE)) 343 7837 Matthew pd.pd_exited = B_TRUE; 344 789 ahrens 345 10922 Jeff SET_BOOKMARK(&czb, objset, 346 10922 Jeff ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 347 7837 Matthew err = traverse_visitbp(&td, NULL, NULL, rootbp, &czb); 348 7837 Matthew 349 7837 Matthew mutex_enter(&pd.pd_mtx); 350 7837 Matthew pd.pd_cancel = B_TRUE; 351 7837 Matthew cv_broadcast(&pd.pd_cv); 352 7837 Matthew while (!pd.pd_exited) 353 7837 Matthew cv_wait(&pd.pd_cv, &pd.pd_mtx); 354 7837 Matthew mutex_exit(&pd.pd_mtx); 355 7837 Matthew 356 7837 Matthew mutex_destroy(&pd.pd_mtx); 357 7837 Matthew cv_destroy(&pd.pd_cv); 358 7837 Matthew 359 789 ahrens return (err); 360 789 ahrens } 361 789 ahrens 362 7837 Matthew /* 363 7837 Matthew * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 364 7837 Matthew * in syncing context). 365 7837 Matthew */ 366 789 ahrens int 367 7837 Matthew traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags, 368 7837 Matthew blkptr_cb_t func, void *arg) 369 6423 gw25295 { 370 7837 Matthew return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds->ds_object, 371 7837 Matthew &ds->ds_phys->ds_bp, txg_start, flags, func, arg)); 372 789 ahrens } 373 789 ahrens 374 789 ahrens /* 375 7837 Matthew * NB: pool must not be changing on-disk (eg, from zdb or sync context). 376 789 ahrens */ 377 7837 Matthew int 378 11125 Jeff traverse_pool(spa_t *spa, uint64_t txg_start, int flags, 379 11125 Jeff blkptr_cb_t func, void *arg) 380 789 ahrens { 381 7837 Matthew int err; 382 7837 Matthew uint64_t obj; 383 7837 Matthew dsl_pool_t *dp = spa_get_dsl(spa); 384 7837 Matthew objset_t *mos = dp->dp_meta_objset; 385 789 ahrens 386 7837 Matthew /* visit the MOS */ 387 7837 Matthew err = traverse_impl(spa, 0, spa_get_rootblkptr(spa), 388 11125 Jeff txg_start, flags, func, arg); 389 7837 Matthew if (err) 390 7837 Matthew return (err); 391 789 ahrens 392 7837 Matthew /* visit each dataset */ 393 10921 Tim for (obj = 1; err == 0; err = dmu_object_next(mos, &obj, FALSE, 394 10921 Tim txg_start)) { 395 7837 Matthew dmu_object_info_t doi; 396 789 ahrens 397 7837 Matthew err = dmu_object_info(mos, obj, &doi); 398 7837 Matthew if (err) 399 7837 Matthew return (err); 400 789 ahrens 401 7837 Matthew if (doi.doi_type == DMU_OT_DSL_DATASET) { 402 7837 Matthew dsl_dataset_t *ds; 403 10921 Tim uint64_t txg = txg_start; 404 10921 Tim 405 7837 Matthew rw_enter(&dp->dp_config_rwlock, RW_READER); 406 7837 Matthew err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 407 7837 Matthew rw_exit(&dp->dp_config_rwlock); 408 7837 Matthew if (err) 409 7837 Matthew return (err); 410 10921 Tim if (ds->ds_phys->ds_prev_snap_txg > txg) 411 10921 Tim txg = ds->ds_phys->ds_prev_snap_txg; 412 11125 Jeff err = traverse_dataset(ds, txg, flags, func, arg); 413 7837 Matthew dsl_dataset_rele(ds, FTAG); 414 7837 Matthew if (err) 415 7837 Matthew return (err); 416 789 ahrens } 417 789 ahrens } 418 7837 Matthew if (err == ESRCH) 419 7837 Matthew err = 0; 420 7837 Matthew return (err); 421 789 ahrens } 422