Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22  10922      Jeff  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens #include <sys/bplist.h>
     27    789    ahrens #include <sys/zfs_context.h>
     28  10922      Jeff 
     29  10922      Jeff void
     30  10922      Jeff bplist_init(bplist_t *bpl)
     31  10922      Jeff {
     32  10922      Jeff 	bzero(bpl, sizeof (*bpl));
     33  10922      Jeff 	mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
     34  10922      Jeff }
     35  10922      Jeff 
     36  10922      Jeff void
     37  10922      Jeff bplist_fini(bplist_t *bpl)
     38  10922      Jeff {
     39  10922      Jeff 	ASSERT(bpl->bpl_queue == NULL);
     40  10922      Jeff 	mutex_destroy(&bpl->bpl_lock);
     41  10922      Jeff }
     42    789    ahrens 
     43   1544  eschrock static int
     44    789    ahrens bplist_hold(bplist_t *bpl)
     45    789    ahrens {
     46    789    ahrens 	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
     47    789    ahrens 	if (bpl->bpl_dbuf == NULL) {
     48   1544  eschrock 		int err = dmu_bonus_hold(bpl->bpl_mos,
     49   1544  eschrock 		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
     50   1544  eschrock 		if (err)
     51   1544  eschrock 			return (err);
     52    789    ahrens 		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
     53    789    ahrens 	}
     54   1544  eschrock 	return (0);
     55    789    ahrens }
     56    789    ahrens 
     57    789    ahrens uint64_t
     58    789    ahrens bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
     59    789    ahrens {
     60   2082  eschrock 	int size;
     61    789    ahrens 
     62   4577    ahrens 	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
     63   2082  eschrock 	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
     64    789    ahrens 
     65   2082  eschrock 	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
     66   2082  eschrock 	    DMU_OT_BPLIST_HDR, size, tx));
     67    789    ahrens }
     68    789    ahrens 
     69    789    ahrens void
     70    789    ahrens bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
     71    789    ahrens {
     72    789    ahrens 	VERIFY(dmu_object_free(mos, object, tx) == 0);
     73    789    ahrens }
     74    789    ahrens 
     75   1544  eschrock int
     76    789    ahrens bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
     77    789    ahrens {
     78    789    ahrens 	dmu_object_info_t doi;
     79   1544  eschrock 	int err;
     80    789    ahrens 
     81   1544  eschrock 	err = dmu_object_info(mos, object, &doi);
     82   1544  eschrock 	if (err)
     83   1544  eschrock 		return (err);
     84    789    ahrens 
     85    789    ahrens 	mutex_enter(&bpl->bpl_lock);
     86    789    ahrens 
     87    789    ahrens 	ASSERT(bpl->bpl_dbuf == NULL);
     88    789    ahrens 	ASSERT(bpl->bpl_phys == NULL);
     89    789    ahrens 	ASSERT(bpl->bpl_cached_dbuf == NULL);
     90    789    ahrens 	ASSERT(bpl->bpl_queue == NULL);
     91    789    ahrens 	ASSERT(object != 0);
     92   2082  eschrock 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
     93   2082  eschrock 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
     94    789    ahrens 
     95    789    ahrens 	bpl->bpl_mos = mos;
     96    789    ahrens 	bpl->bpl_object = object;
     97    789    ahrens 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
     98    789    ahrens 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
     99   2082  eschrock 	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
    100    789    ahrens 
    101    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    102   1544  eschrock 	return (0);
    103    789    ahrens }
    104    789    ahrens 
    105    789    ahrens void
    106    789    ahrens bplist_close(bplist_t *bpl)
    107    789    ahrens {
    108    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    109    789    ahrens 
    110    789    ahrens 	ASSERT(bpl->bpl_queue == NULL);
    111    789    ahrens 
    112    789    ahrens 	if (bpl->bpl_cached_dbuf) {
    113   1544  eschrock 		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    114    789    ahrens 		bpl->bpl_cached_dbuf = NULL;
    115    789    ahrens 	}
    116    789    ahrens 	if (bpl->bpl_dbuf) {
    117   1544  eschrock 		dmu_buf_rele(bpl->bpl_dbuf, bpl);
    118    789    ahrens 		bpl->bpl_dbuf = NULL;
    119    789    ahrens 		bpl->bpl_phys = NULL;
    120    789    ahrens 	}
    121    789    ahrens 
    122    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    123    789    ahrens }
    124    789    ahrens 
    125    789    ahrens boolean_t
    126    789    ahrens bplist_empty(bplist_t *bpl)
    127    789    ahrens {
    128    789    ahrens 	boolean_t rv;
    129    789    ahrens 
    130    789    ahrens 	if (bpl->bpl_object == 0)
    131    789    ahrens 		return (B_TRUE);
    132    789    ahrens 
    133    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    134   1544  eschrock 	VERIFY(0 == bplist_hold(bpl)); /* XXX */
    135    789    ahrens 	rv = (bpl->bpl_phys->bpl_entries == 0);
    136    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    137    789    ahrens 
    138    789    ahrens 	return (rv);
    139   1544  eschrock }
    140   1544  eschrock 
    141   1544  eschrock static int
    142   1544  eschrock bplist_cache(bplist_t *bpl, uint64_t blkid)
    143   1544  eschrock {
    144   1544  eschrock 	int err = 0;
    145   1544  eschrock 
    146   1544  eschrock 	if (bpl->bpl_cached_dbuf == NULL ||
    147   1544  eschrock 	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
    148   1544  eschrock 		if (bpl->bpl_cached_dbuf != NULL)
    149   1544  eschrock 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
    150   1544  eschrock 		err = dmu_buf_hold(bpl->bpl_mos,
    151   1544  eschrock 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
    152   1544  eschrock 		    bpl, &bpl->bpl_cached_dbuf);
    153   1544  eschrock 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
    154   1544  eschrock 		    1ULL << bpl->bpl_blockshift);
    155   1544  eschrock 	}
    156   1544  eschrock 	return (err);
    157    789    ahrens }
    158    789    ahrens 
    159    789    ahrens int
    160    789    ahrens bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
    161    789    ahrens {
    162    789    ahrens 	uint64_t blk, off;
    163    789    ahrens 	blkptr_t *bparray;
    164   1544  eschrock 	int err;
    165    789    ahrens 
    166    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    167   1544  eschrock 
    168   1544  eschrock 	err = bplist_hold(bpl);
    169   1544  eschrock 	if (err) {
    170   1544  eschrock 		mutex_exit(&bpl->bpl_lock);
    171   1544  eschrock 		return (err);
    172   1544  eschrock 	}
    173    789    ahrens 
    174    789    ahrens 	if (*itorp >= bpl->bpl_phys->bpl_entries) {
    175    789    ahrens 		mutex_exit(&bpl->bpl_lock);
    176    789    ahrens 		return (ENOENT);
    177    789    ahrens 	}
    178    789    ahrens 
    179    789    ahrens 	blk = *itorp >> bpl->bpl_bpshift;
    180    789    ahrens 	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
    181    789    ahrens 
    182   1544  eschrock 	err = bplist_cache(bpl, blk);
    183   1544  eschrock 	if (err) {
    184   1544  eschrock 		mutex_exit(&bpl->bpl_lock);
    185   1544  eschrock 		return (err);
    186    789    ahrens 	}
    187    789    ahrens 
    188   1544  eschrock 	bparray = bpl->bpl_cached_dbuf->db_data;
    189    789    ahrens 	*bp = bparray[off];
    190    789    ahrens 	(*itorp)++;
    191    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    192    789    ahrens 	return (0);
    193    789    ahrens }
    194    789    ahrens 
    195   1544  eschrock int
    196   7046    ahrens bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
    197    789    ahrens {
    198    789    ahrens 	uint64_t blk, off;
    199    789    ahrens 	blkptr_t *bparray;
    200   1544  eschrock 	int err;
    201    789    ahrens 
    202    789    ahrens 	ASSERT(!BP_IS_HOLE(bp));
    203    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    204   1544  eschrock 	err = bplist_hold(bpl);
    205   1544  eschrock 	if (err)
    206   1544  eschrock 		return (err);
    207    789    ahrens 
    208    789    ahrens 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
    209    789    ahrens 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
    210    789    ahrens 
    211   1544  eschrock 	err = bplist_cache(bpl, blk);
    212   1544  eschrock 	if (err) {
    213   1544  eschrock 		mutex_exit(&bpl->bpl_lock);
    214   1544  eschrock 		return (err);
    215    789    ahrens 	}
    216    789    ahrens 
    217   1544  eschrock 	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
    218   1544  eschrock 	bparray = bpl->bpl_cached_dbuf->db_data;
    219    789    ahrens 	bparray[off] = *bp;
    220    789    ahrens 
    221    789    ahrens 	/* We never need the fill count. */
    222    789    ahrens 	bparray[off].blk_fill = 0;
    223    789    ahrens 
    224    789    ahrens 	/* The bplist will compress better if we can leave off the checksum */
    225  10922      Jeff 	if (!BP_GET_DEDUP(&bparray[off]))
    226  10922      Jeff 		bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
    227    789    ahrens 
    228    789    ahrens 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    229    789    ahrens 	bpl->bpl_phys->bpl_entries++;
    230   2082  eschrock 	bpl->bpl_phys->bpl_bytes +=
    231  10922      Jeff 	    bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp);
    232   2082  eschrock 	if (bpl->bpl_havecomp) {
    233   2082  eschrock 		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
    234   2082  eschrock 		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
    235   2082  eschrock 	}
    236    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    237   1544  eschrock 
    238   1544  eschrock 	return (0);
    239    789    ahrens }
    240    789    ahrens 
    241  10922      Jeff void
    242  10922      Jeff bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx)
    243  10922      Jeff {
    244  10922      Jeff 	VERIFY(bplist_enqueue(bpl, bp, tx) == 0);
    245  10922      Jeff }
    246  10922      Jeff 
    247    789    ahrens /*
    248  10922      Jeff  * Deferred entry; will be processed later by bplist_sync().
    249    789    ahrens  */
    250    789    ahrens void
    251   7046    ahrens bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
    252    789    ahrens {
    253    789    ahrens 	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
    254    789    ahrens 
    255    789    ahrens 	ASSERT(!BP_IS_HOLE(bp));
    256    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    257    789    ahrens 	bpq->bpq_blk = *bp;
    258    789    ahrens 	bpq->bpq_next = bpl->bpl_queue;
    259    789    ahrens 	bpl->bpl_queue = bpq;
    260    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    261    789    ahrens }
    262    789    ahrens 
    263    789    ahrens void
    264  10922      Jeff bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx)
    265    789    ahrens {
    266    789    ahrens 	bplist_q_t *bpq;
    267    789    ahrens 
    268    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    269    789    ahrens 	while ((bpq = bpl->bpl_queue) != NULL) {
    270    789    ahrens 		bpl->bpl_queue = bpq->bpq_next;
    271    789    ahrens 		mutex_exit(&bpl->bpl_lock);
    272  10922      Jeff 		func(arg, &bpq->bpq_blk, tx);
    273    789    ahrens 		kmem_free(bpq, sizeof (*bpq));
    274    789    ahrens 		mutex_enter(&bpl->bpl_lock);
    275    789    ahrens 	}
    276    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    277    789    ahrens }
    278    789    ahrens 
    279    789    ahrens void
    280    789    ahrens bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
    281    789    ahrens {
    282    789    ahrens 	mutex_enter(&bpl->bpl_lock);
    283    789    ahrens 	ASSERT3P(bpl->bpl_queue, ==, NULL);
    284   1544  eschrock 	VERIFY(0 == bplist_hold(bpl));
    285    789    ahrens 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
    286   1544  eschrock 	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
    287   1544  eschrock 	    bpl->bpl_object, 0, -1ULL, tx));
    288    789    ahrens 	bpl->bpl_phys->bpl_entries = 0;
    289    789    ahrens 	bpl->bpl_phys->bpl_bytes = 0;
    290   2082  eschrock 	if (bpl->bpl_havecomp) {
    291   2082  eschrock 		bpl->bpl_phys->bpl_comp = 0;
    292   2082  eschrock 		bpl->bpl_phys->bpl_uncomp = 0;
    293   2082  eschrock 	}
    294    789    ahrens 	mutex_exit(&bpl->bpl_lock);
    295    789    ahrens }
    296   2082  eschrock 
    297   2082  eschrock int
    298   2082  eschrock bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
    299   2082  eschrock {
    300   2082  eschrock 	int err;
    301   2082  eschrock 
    302   2082  eschrock 	mutex_enter(&bpl->bpl_lock);
    303   2082  eschrock 
    304   2082  eschrock 	err = bplist_hold(bpl);
    305   2082  eschrock 	if (err) {
    306   2082  eschrock 		mutex_exit(&bpl->bpl_lock);
    307   2082  eschrock 		return (err);
    308   2082  eschrock 	}
    309   2082  eschrock 
    310   2082  eschrock 	*usedp = bpl->bpl_phys->bpl_bytes;
    311   2082  eschrock 	if (bpl->bpl_havecomp) {
    312   2082  eschrock 		*compp = bpl->bpl_phys->bpl_comp;
    313   2082  eschrock 		*uncompp = bpl->bpl_phys->bpl_uncomp;
    314   2082  eschrock 	}
    315   2082  eschrock 	mutex_exit(&bpl->bpl_lock);
    316   2082  eschrock 
    317   2082  eschrock 	if (!bpl->bpl_havecomp) {
    318   5367    ahrens 		uint64_t itor = 0, comp = 0, uncomp = 0;
    319   5367    ahrens 		blkptr_t bp;
    320   5367    ahrens 
    321   2082  eschrock 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    322   2082  eschrock 			comp += BP_GET_PSIZE(&bp);
    323   2082  eschrock 			uncomp += BP_GET_UCSIZE(&bp);
    324   2082  eschrock 		}
    325   2082  eschrock 		if (err == ENOENT)
    326   2082  eschrock 			err = 0;
    327   2082  eschrock 		*compp = comp;
    328   2082  eschrock 		*uncompp = uncomp;
    329   2082  eschrock 	}
    330   2082  eschrock 
    331   2082  eschrock 	return (err);
    332   2082  eschrock }
    333   7390   Matthew 
    334   7390   Matthew /*
    335  10922      Jeff  * Return (in *dsizep) the amount of space on the deadlist which is:
    336   7390   Matthew  * mintxg < blk_birth <= maxtxg
    337   7390   Matthew  */
    338   7390   Matthew int
    339   7390   Matthew bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
    340  10922      Jeff     uint64_t *dsizep)
    341   7390   Matthew {
    342   7390   Matthew 	uint64_t size = 0;
    343   7390   Matthew 	uint64_t itor = 0;
    344   7390   Matthew 	blkptr_t bp;
    345   7390   Matthew 	int err;
    346   7390   Matthew 
    347   7390   Matthew 	/*
    348   7390   Matthew 	 * As an optimization, if they want the whole txg range, just
    349   7390   Matthew 	 * get bpl_bytes rather than iterating over the bps.
    350   7390   Matthew 	 */
    351   7390   Matthew 	if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
    352   7390   Matthew 		mutex_enter(&bpl->bpl_lock);
    353   7390   Matthew 		err = bplist_hold(bpl);
    354   7390   Matthew 		if (err == 0)
    355  10922      Jeff 			*dsizep = bpl->bpl_phys->bpl_bytes;
    356   7390   Matthew 		mutex_exit(&bpl->bpl_lock);
    357   7390   Matthew 		return (err);
    358   7390   Matthew 	}
    359   7390   Matthew 
    360   7390   Matthew 	while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
    361   7390   Matthew 		if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
    362  10922      Jeff 			size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp);
    363   7390   Matthew 		}
    364   7390   Matthew 	}
    365   7390   Matthew 	if (err == ENOENT)
    366   7390   Matthew 		err = 0;
    367  10922      Jeff 	*dsizep = size;
    368   7390   Matthew 	return (err);
    369   7390   Matthew }
    370