Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/bplist.h>
     27 #include <sys/zfs_context.h>
     28 
     29 static int
     30 bplist_hold(bplist_t *bpl)
     31 {
     32 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
     33 	if (bpl->bpl_dbuf == NULL) {
     34 		int err = dmu_bonus_hold(bpl->bpl_mos,
     35 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
     36 		if (err)
     37 			return (err);
     38 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
     39 	}
     40 	return (0);
     41 }
     42 
     43 uint64_t
     44 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
     45 {
     46 	int size;
     47 
     48 	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
     49 	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
     50 
     51 	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
     52 	    DMU_OT_BPLIST_HDR, size, tx));
     53 }
     54 
     55 void
     56 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
     57 {
     58 	VERIFY(dmu_object_free(mos, object, tx) == 0);
     59 }
     60 
     61 int
     62 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
     63 {
     64 	dmu_object_info_t doi;
     65 	int err;
     66 
     67 	err = dmu_object_info(mos, object, &doi);
     68 	if (err)
     69 		return (err);
     70 
     71 	mutex_enter(&bpl->bpl_lock);
     72 
     73 	ASSERT(bpl->bpl_dbuf == NULL);
     74 	ASSERT(bpl->bpl_phys == NULL);
     75 	ASSERT(bpl->bpl_cached_dbuf == NULL);
     76 	ASSERT(bpl->bpl_queue == NULL);
     77 	ASSERT(object != 0);
     78 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
     79 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
     80 
     81 	bpl->bpl_mos = mos;
     82 	bpl->bpl_object = object;
     83 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
     84 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
     85 	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
     86 
     87 	mutex_exit(&bpl->bpl_lock);
     88 	return (0);
     89 }
     90 
     91 void
     92 bplist_close(bplist_t *bpl)
     93 {
     94 	mutex_enter(&bpl->bpl_lock);
     95 
     96 	ASSERT(bpl->bpl_queue == NULL);
     97 
     98 	if (bpl->bpl_cached_dbuf) {
     99 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    100 		bpl->bpl_cached_dbuf = NULL;
    101 	}
    102 	if (bpl->bpl_dbuf) {
    103 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
    104 		bpl->bpl_dbuf = NULL;
    105 		bpl->bpl_phys = NULL;
    106 	}
    107 
    108 	mutex_exit(&bpl->bpl_lock);
    109 }
    110 
    111 boolean_t
    112 bplist_empty(bplist_t *bpl)
    113 {
    114 	boolean_t rv;
    115 
    116 	if (bpl->bpl_object == 0)
    117 		return (B_TRUE);
    118 
    119 	mutex_enter(&bpl->bpl_lock);
    120 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
    121 	rv = (bpl->bpl_phys->bpl_entries == 0);
    122 	mutex_exit(&bpl->bpl_lock);
    123 
    124 	return (rv);
    125 }
    126 
    127 static int
    128 bplist_cache(bplist_t *bpl, uint64_t blkid)
    129 {
    130 	int err = 0;
    131 
    132 	if (bpl->bpl_cached_dbuf == NULL ||
    133 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
    134 		if (bpl->bpl_cached_dbuf != NULL)
    135 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    136 		err = dmu_buf_hold(bpl->bpl_mos,
    137 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
    138 		    bpl, &bpl->bpl_cached_dbuf);
    139 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
    140 		    1ULL << bpl->bpl_blockshift);
    141 	}
    142 	return (err);
    143 }
    144 
    145 int
    146 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
    147 {
    148 	uint64_t blk, off;
    149 	blkptr_t *bparray;
    150 	int err;
    151 
    152 	mutex_enter(&bpl->bpl_lock);
    153 
    154 	err = bplist_hold(bpl);
    155 	if (err) {
    156 		mutex_exit(&bpl->bpl_lock);
    157 		return (err);
    158 	}
    159 
    160 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
    161 		mutex_exit(&bpl->bpl_lock);
    162 		return (ENOENT);
    163 	}
    164 
    165 	blk = *itorp >> bpl->bpl_bpshift;
    166 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
    167 
    168 	err = bplist_cache(bpl, blk);
    169 	if (err) {
    170 		mutex_exit(&bpl->bpl_lock);
    171 		return (err);
    172 	}
    173 
    174 	bparray = bpl->bpl_cached_dbuf->db_data;
    175 	*bp = bparray[off];
    176 	(*itorp)++;
    177 	mutex_exit(&bpl->bpl_lock);
    178 	return (0);
    179 }
    180 
    181 int
    182 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
    183 {
    184 	uint64_t blk, off;
    185 	blkptr_t *bparray;
    186 	int err;
    187 
    188 	ASSERT(!BP_IS_HOLE(bp));
    189 	mutex_enter(&bpl->bpl_lock);
    190 	err = bplist_hold(bpl);
    191 	if (err)
    192 		return (err);
    193 
    194 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
    195 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
    196 
    197 	err = bplist_cache(bpl, blk);
    198 	if (err) {
    199 		mutex_exit(&bpl->bpl_lock);
    200 		return (err);
    201 	}
    202 
    203 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
    204 	bparray = bpl->bpl_cached_dbuf->db_data;
    205 	bparray[off] = *bp;
    206 
    207 	/* We never need the fill count. */
    208 	bparray[off].blk_fill = 0;
    209 
    210 	/* The bplist will compress better if we can leave off the checksum */
    211 	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
    212 
    213 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    214 	bpl->bpl_phys->bpl_entries++;
    215 	bpl->bpl_phys->bpl_bytes +=
    216 	    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
    217 	if (bpl->bpl_havecomp) {
    218 		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
    219 		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
    220 	}
    221 	mutex_exit(&bpl->bpl_lock);
    222 
    223 	return (0);
    224 }
    225 
    226 /*
    227  * Deferred entry; will be written later by bplist_sync().
    228  */
    229 void
    230 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
    231 {
    232 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
    233 
    234 	ASSERT(!BP_IS_HOLE(bp));
    235 	mutex_enter(&bpl->bpl_lock);
    236 	bpq->bpq_blk = *bp;
    237 	bpq->bpq_next = bpl->bpl_queue;
    238 	bpl->bpl_queue = bpq;
    239 	mutex_exit(&bpl->bpl_lock);
    240 }
    241 
    242 void
    243 bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
    244 {
    245 	bplist_q_t *bpq;
    246 
    247 	mutex_enter(&bpl->bpl_lock);
    248 	while ((bpq = bpl->bpl_queue) != NULL) {
    249 		bpl->bpl_queue = bpq->bpq_next;
    250 		mutex_exit(&bpl->bpl_lock);
    251 		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
    252 		kmem_free(bpq, sizeof (*bpq));
    253 		mutex_enter(&bpl->bpl_lock);
    254 	}
    255 	mutex_exit(&bpl->bpl_lock);
    256 }
    257 
    258 void
    259 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
    260 {
    261 	mutex_enter(&bpl->bpl_lock);
    262 	ASSERT3P(bpl->bpl_queue, ==, NULL);
    263 	VERIFY(0 == bplist_hold(bpl));
    264 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    265 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
    266 	    bpl->bpl_object, 0, -1ULL, tx));
    267 	bpl->bpl_phys->bpl_entries = 0;
    268 	bpl->bpl_phys->bpl_bytes = 0;
    269 	if (bpl->bpl_havecomp) {
    270 		bpl->bpl_phys->bpl_comp = 0;
    271 		bpl->bpl_phys->bpl_uncomp = 0;
    272 	}
    273 	mutex_exit(&bpl->bpl_lock);
    274 }
    275 
    276 int
    277 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
    278 {
    279 	int err;
    280 
    281 	mutex_enter(&bpl->bpl_lock);
    282 
    283 	err = bplist_hold(bpl);
    284 	if (err) {
    285 		mutex_exit(&bpl->bpl_lock);
    286 		return (err);
    287 	}
    288 
    289 	*usedp = bpl->bpl_phys->bpl_bytes;
    290 	if (bpl->bpl_havecomp) {
    291 		*compp = bpl->bpl_phys->bpl_comp;
    292 		*uncompp = bpl->bpl_phys->bpl_uncomp;
    293 	}
    294 	mutex_exit(&bpl->bpl_lock);
    295 
    296 	if (!bpl->bpl_havecomp) {
    297 		uint64_t itor = 0, comp = 0, uncomp = 0;
    298 		blkptr_t bp;
    299 
    300 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    301 			comp += BP_GET_PSIZE(&bp);
    302 			uncomp += BP_GET_UCSIZE(&bp);
    303 		}
    304 		if (err == ENOENT)
    305 			err = 0;
    306 		*compp = comp;
    307 		*uncompp = uncomp;
    308 	}
    309 
    310 	return (err);
    311 }
    312 
    313 /*
    314  * Return (in *dasizep) the amount of space on the deadlist which is:
    315  * mintxg < blk_birth <= maxtxg
    316  */
    317 int
    318 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
    319     uint64_t *dasizep)
    320 {
    321 	uint64_t size = 0;
    322 	uint64_t itor = 0;
    323 	blkptr_t bp;
    324 	int err;
    325 
    326 	/*
    327 	 * As an optimization, if they want the whole txg range, just
    328 	 * get bpl_bytes rather than iterating over the bps.
    329 	 */
    330 	if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
    331 		mutex_enter(&bpl->bpl_lock);
    332 		err = bplist_hold(bpl);
    333 		if (err == 0)
    334 			*dasizep = bpl->bpl_phys->bpl_bytes;
    335 		mutex_exit(&bpl->bpl_lock);
    336 		return (err);
    337 	}
    338 
    339 	while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    340 		if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
    341 			size +=
    342 			    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp);
    343 		}
    344 	}
    345 	if (err == ENOENT)
    346 		err = 0;
    347 	*dasizep = size;
    348 	return (err);
    349 }
    350