Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/bplist.h>
     27 #include <sys/zfs_context.h>
     28 
     29 void
     30 bplist_init(bplist_t *bpl)
     31 {
     32 	bzero(bpl, sizeof (*bpl));
     33 	mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
     34 }
     35 
     36 void
     37 bplist_fini(bplist_t *bpl)
     38 {
     39 	ASSERT(bpl->bpl_queue == NULL);
     40 	mutex_destroy(&bpl->bpl_lock);
     41 }
     42 
     43 static int
     44 bplist_hold(bplist_t *bpl)
     45 {
     46 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
     47 	if (bpl->bpl_dbuf == NULL) {
     48 		int err = dmu_bonus_hold(bpl->bpl_mos,
     49 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
     50 		if (err)
     51 			return (err);
     52 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
     53 	}
     54 	return (0);
     55 }
     56 
     57 uint64_t
     58 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
     59 {
     60 	int size;
     61 
     62 	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
     63 	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
     64 
     65 	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
     66 	    DMU_OT_BPLIST_HDR, size, tx));
     67 }
     68 
     69 void
     70 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
     71 {
     72 	VERIFY(dmu_object_free(mos, object, tx) == 0);
     73 }
     74 
     75 int
     76 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
     77 {
     78 	dmu_object_info_t doi;
     79 	int err;
     80 
     81 	err = dmu_object_info(mos, object, &doi);
     82 	if (err)
     83 		return (err);
     84 
     85 	mutex_enter(&bpl->bpl_lock);
     86 
     87 	ASSERT(bpl->bpl_dbuf == NULL);
     88 	ASSERT(bpl->bpl_phys == NULL);
     89 	ASSERT(bpl->bpl_cached_dbuf == NULL);
     90 	ASSERT(bpl->bpl_queue == NULL);
     91 	ASSERT(object != 0);
     92 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
     93 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
     94 
     95 	bpl->bpl_mos = mos;
     96 	bpl->bpl_object = object;
     97 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
     98 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
     99 	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
    100 
    101 	mutex_exit(&bpl->bpl_lock);
    102 	return (0);
    103 }
    104 
    105 void
    106 bplist_close(bplist_t *bpl)
    107 {
    108 	mutex_enter(&bpl->bpl_lock);
    109 
    110 	ASSERT(bpl->bpl_queue == NULL);
    111 
    112 	if (bpl->bpl_cached_dbuf) {
    113 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    114 		bpl->bpl_cached_dbuf = NULL;
    115 	}
    116 	if (bpl->bpl_dbuf) {
    117 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
    118 		bpl->bpl_dbuf = NULL;
    119 		bpl->bpl_phys = NULL;
    120 	}
    121 
    122 	mutex_exit(&bpl->bpl_lock);
    123 }
    124 
    125 boolean_t
    126 bplist_empty(bplist_t *bpl)
    127 {
    128 	boolean_t rv;
    129 
    130 	if (bpl->bpl_object == 0)
    131 		return (B_TRUE);
    132 
    133 	mutex_enter(&bpl->bpl_lock);
    134 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
    135 	rv = (bpl->bpl_phys->bpl_entries == 0);
    136 	mutex_exit(&bpl->bpl_lock);
    137 
    138 	return (rv);
    139 }
    140 
    141 static int
    142 bplist_cache(bplist_t *bpl, uint64_t blkid)
    143 {
    144 	int err = 0;
    145 
    146 	if (bpl->bpl_cached_dbuf == NULL ||
    147 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
    148 		if (bpl->bpl_cached_dbuf != NULL)
    149 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    150 		err = dmu_buf_hold(bpl->bpl_mos,
    151 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
    152 		    bpl, &bpl->bpl_cached_dbuf);
    153 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
    154 		    1ULL << bpl->bpl_blockshift);
    155 	}
    156 	return (err);
    157 }
    158 
    159 int
    160 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
    161 {
    162 	uint64_t blk, off;
    163 	blkptr_t *bparray;
    164 	int err;
    165 
    166 	mutex_enter(&bpl->bpl_lock);
    167 
    168 	err = bplist_hold(bpl);
    169 	if (err) {
    170 		mutex_exit(&bpl->bpl_lock);
    171 		return (err);
    172 	}
    173 
    174 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
    175 		mutex_exit(&bpl->bpl_lock);
    176 		return (ENOENT);
    177 	}
    178 
    179 	blk = *itorp >> bpl->bpl_bpshift;
    180 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
    181 
    182 	err = bplist_cache(bpl, blk);
    183 	if (err) {
    184 		mutex_exit(&bpl->bpl_lock);
    185 		return (err);
    186 	}
    187 
    188 	bparray = bpl->bpl_cached_dbuf->db_data;
    189 	*bp = bparray[off];
    190 	(*itorp)++;
    191 	mutex_exit(&bpl->bpl_lock);
    192 	return (0);
    193 }
    194 
    195 int
    196 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
    197 {
    198 	uint64_t blk, off;
    199 	blkptr_t *bparray;
    200 	int err;
    201 
    202 	ASSERT(!BP_IS_HOLE(bp));
    203 	mutex_enter(&bpl->bpl_lock);
    204 	err = bplist_hold(bpl);
    205 	if (err)
    206 		return (err);
    207 
    208 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
    209 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
    210 
    211 	err = bplist_cache(bpl, blk);
    212 	if (err) {
    213 		mutex_exit(&bpl->bpl_lock);
    214 		return (err);
    215 	}
    216 
    217 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
    218 	bparray = bpl->bpl_cached_dbuf->db_data;
    219 	bparray[off] = *bp;
    220 
    221 	/* We never need the fill count. */
    222 	bparray[off].blk_fill = 0;
    223 
    224 	/* The bplist will compress better if we can leave off the checksum */
    225 	if (!BP_GET_DEDUP(&bparray[off]))
    226 		bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
    227 
    228 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    229 	bpl->bpl_phys->bpl_entries++;
    230 	bpl->bpl_phys->bpl_bytes +=
    231 	    bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp);
    232 	if (bpl->bpl_havecomp) {
    233 		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
    234 		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
    235 	}
    236 	mutex_exit(&bpl->bpl_lock);
    237 
    238 	return (0);
    239 }
    240 
    241 void
    242 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx)
    243 {
    244 	VERIFY(bplist_enqueue(bpl, bp, tx) == 0);
    245 }
    246 
    247 /*
    248  * Deferred entry; will be processed later by bplist_sync().
    249  */
    250 void
    251 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
    252 {
    253 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
    254 
    255 	ASSERT(!BP_IS_HOLE(bp));
    256 	mutex_enter(&bpl->bpl_lock);
    257 	bpq->bpq_blk = *bp;
    258 	bpq->bpq_next = bpl->bpl_queue;
    259 	bpl->bpl_queue = bpq;
    260 	mutex_exit(&bpl->bpl_lock);
    261 }
    262 
    263 void
    264 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx)
    265 {
    266 	bplist_q_t *bpq;
    267 
    268 	mutex_enter(&bpl->bpl_lock);
    269 	while ((bpq = bpl->bpl_queue) != NULL) {
    270 		bpl->bpl_queue = bpq->bpq_next;
    271 		mutex_exit(&bpl->bpl_lock);
    272 		func(arg, &bpq->bpq_blk, tx);
    273 		kmem_free(bpq, sizeof (*bpq));
    274 		mutex_enter(&bpl->bpl_lock);
    275 	}
    276 	mutex_exit(&bpl->bpl_lock);
    277 }
    278 
    279 void
    280 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
    281 {
    282 	mutex_enter(&bpl->bpl_lock);
    283 	ASSERT3P(bpl->bpl_queue, ==, NULL);
    284 	VERIFY(0 == bplist_hold(bpl));
    285 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    286 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
    287 	    bpl->bpl_object, 0, -1ULL, tx));
    288 	bpl->bpl_phys->bpl_entries = 0;
    289 	bpl->bpl_phys->bpl_bytes = 0;
    290 	if (bpl->bpl_havecomp) {
    291 		bpl->bpl_phys->bpl_comp = 0;
    292 		bpl->bpl_phys->bpl_uncomp = 0;
    293 	}
    294 	mutex_exit(&bpl->bpl_lock);
    295 }
    296 
    297 int
    298 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
    299 {
    300 	int err;
    301 
    302 	mutex_enter(&bpl->bpl_lock);
    303 
    304 	err = bplist_hold(bpl);
    305 	if (err) {
    306 		mutex_exit(&bpl->bpl_lock);
    307 		return (err);
    308 	}
    309 
    310 	*usedp = bpl->bpl_phys->bpl_bytes;
    311 	if (bpl->bpl_havecomp) {
    312 		*compp = bpl->bpl_phys->bpl_comp;
    313 		*uncompp = bpl->bpl_phys->bpl_uncomp;
    314 	}
    315 	mutex_exit(&bpl->bpl_lock);
    316 
    317 	if (!bpl->bpl_havecomp) {
    318 		uint64_t itor = 0, comp = 0, uncomp = 0;
    319 		blkptr_t bp;
    320 
    321 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    322 			comp += BP_GET_PSIZE(&bp);
    323 			uncomp += BP_GET_UCSIZE(&bp);
    324 		}
    325 		if (err == ENOENT)
    326 			err = 0;
    327 		*compp = comp;
    328 		*uncompp = uncomp;
    329 	}
    330 
    331 	return (err);
    332 }
    333 
    334 /*
    335  * Return (in *dsizep) the amount of space on the deadlist which is:
    336  * mintxg < blk_birth <= maxtxg
    337  */
    338 int
    339 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
    340     uint64_t *dsizep)
    341 {
    342 	uint64_t size = 0;
    343 	uint64_t itor = 0;
    344 	blkptr_t bp;
    345 	int err;
    346 
    347 	/*
    348 	 * As an optimization, if they want the whole txg range, just
    349 	 * get bpl_bytes rather than iterating over the bps.
    350 	 */
    351 	if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
    352 		mutex_enter(&bpl->bpl_lock);
    353 		err = bplist_hold(bpl);
    354 		if (err == 0)
    355 			*dsizep = bpl->bpl_phys->bpl_bytes;
    356 		mutex_exit(&bpl->bpl_lock);
    357 		return (err);
    358 	}
    359 
    360 	while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    361 		if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
    362 			size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp);
    363 		}
    364 	}
    365 	if (err == ENOENT)
    366 		err = 0;
    367 	*dsizep = size;
    368 	return (err);
    369 }
    370