Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22   9234    George  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens #include <sys/zfs_context.h>
     27    789    ahrens #include <sys/spa_impl.h>
     28    789    ahrens #include <sys/zio.h>
     29    789    ahrens #include <sys/zio_checksum.h>
     30    789    ahrens #include <sys/zio_compress.h>
     31    789    ahrens #include <sys/dmu.h>
     32    789    ahrens #include <sys/dmu_tx.h>
     33    789    ahrens #include <sys/zap.h>
     34    789    ahrens #include <sys/zil.h>
     35    789    ahrens #include <sys/vdev_impl.h>
     36    789    ahrens #include <sys/metaslab.h>
     37    789    ahrens #include <sys/uberblock_impl.h>
     38    789    ahrens #include <sys/txg.h>
     39    789    ahrens #include <sys/avl.h>
     40    789    ahrens #include <sys/unique.h>
     41    789    ahrens #include <sys/dsl_pool.h>
     42    789    ahrens #include <sys/dsl_dir.h>
     43    789    ahrens #include <sys/dsl_prop.h>
     44    789    ahrens #include <sys/fs/zfs.h>
     45   4620    perrin #include <sys/metaslab_impl.h>
     46   7754      Jeff #include <sys/arc.h>
     47  10956    George #include <sys/ddt.h>
     48   4787    ahrens #include "zfs_prop.h"
     49    789    ahrens 
     50    789    ahrens /*
     51    789    ahrens  * SPA locking
     52    789    ahrens  *
     53    789    ahrens  * There are four basic locks for managing spa_t structures:
     54    789    ahrens  *
     55    789    ahrens  * spa_namespace_lock (global mutex)
     56    789    ahrens  *
     57   1775     billm  *	This lock must be acquired to do any of the following:
     58    789    ahrens  *
     59   1775     billm  *		- Lookup a spa_t by name
     60   1775     billm  *		- Add or remove a spa_t from the namespace
     61   1775     billm  *		- Increase spa_refcount from non-zero
     62   1775     billm  *		- Check if spa_refcount is zero
     63   1775     billm  *		- Rename a spa_t
     64   1544  eschrock  *		- add/remove/attach/detach devices
     65   1775     billm  *		- Held for the duration of create/destroy/import/export
     66    789    ahrens  *
     67   1775     billm  *	It does not need to handle recursion.  A create or destroy may
     68   1775     billm  *	reference objects (files or zvols) in other pools, but by
     69   1775     billm  *	definition they must have an existing reference, and will never need
     70   1775     billm  *	to lookup a spa_t by name.
     71    789    ahrens  *
     72    789    ahrens  * spa_refcount (per-spa refcount_t protected by mutex)
     73    789    ahrens  *
     74   1775     billm  *	This reference count keep track of any active users of the spa_t.  The
     75   1775     billm  *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
     76   1775     billm  *	the refcount is never really 'zero' - opening a pool implicitly keeps
     77   7046    ahrens  *	some references in the DMU.  Internally we check against spa_minref, but
     78   1775     billm  *	present the image of a zero/non-zero value to consumers.
     79    789    ahrens  *
     80   7754      Jeff  * spa_config_lock[] (per-spa array of rwlocks)
     81    789    ahrens  *
     82   4787    ahrens  *	This protects the spa_t from config changes, and must be held in
     83   4787    ahrens  *	the following circumstances:
     84    789    ahrens  *
     85   1775     billm  *		- RW_READER to perform I/O to the spa
     86   1775     billm  *		- RW_WRITER to change the vdev config
     87    789    ahrens  *
     88    789    ahrens  * The locking order is fairly straightforward:
     89    789    ahrens  *
     90   1775     billm  *		spa_namespace_lock	->	spa_refcount
     91    789    ahrens  *
     92   1775     billm  *	The namespace lock must be acquired to increase the refcount from 0
     93   1775     billm  *	or to check if it is zero.
     94    789    ahrens  *
     95   7754      Jeff  *		spa_refcount		->	spa_config_lock[]
     96    789    ahrens  *
     97   1775     billm  *	There must be at least one valid reference on the spa_t to acquire
     98   1775     billm  *	the config lock.
     99    789    ahrens  *
    100   7754      Jeff  *		spa_namespace_lock	->	spa_config_lock[]
    101    789    ahrens  *
    102   1775     billm  *	The namespace lock must always be taken before the config lock.
    103    789    ahrens  *
    104    789    ahrens  *
    105   7754      Jeff  * The spa_namespace_lock can be acquired directly and is globally visible.
    106    789    ahrens  *
    107   7754      Jeff  * The namespace is manipulated using the following functions, all of which
    108   7754      Jeff  * require the spa_namespace_lock to be held.
    109    789    ahrens  *
    110   1775     billm  *	spa_lookup()		Lookup a spa_t by name.
    111    789    ahrens  *
    112   1775     billm  *	spa_add()		Create a new spa_t in the namespace.
    113    789    ahrens  *
    114   1775     billm  *	spa_remove()		Remove a spa_t from the namespace.  This also
    115   1775     billm  *				frees up any memory associated with the spa_t.
    116    789    ahrens  *
    117   1775     billm  *	spa_next()		Returns the next spa_t in the system, or the
    118   1775     billm  *				first if NULL is passed.
    119    789    ahrens  *
    120   1775     billm  *	spa_evict_all()		Shutdown and remove all spa_t structures in
    121   1775     billm  *				the system.
    122    789    ahrens  *
    123   1544  eschrock  *	spa_guid_exists()	Determine whether a pool/device guid exists.
    124    789    ahrens  *
    125    789    ahrens  * The spa_refcount is manipulated using the following functions:
    126    789    ahrens  *
    127   1775     billm  *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
    128   1775     billm  *				called with spa_namespace_lock held if the
    129   1775     billm  *				refcount is currently zero.
    130    789    ahrens  *
    131   1775     billm  *	spa_close()		Remove a reference from the spa_t.  This will
    132   1775     billm  *				not free the spa_t or remove it from the
    133   1775     billm  *				namespace.  No locking is required.
    134    789    ahrens  *
    135   1775     billm  *	spa_refcount_zero()	Returns true if the refcount is currently
    136   1775     billm  *				zero.  Must be called with spa_namespace_lock
    137   1775     billm  *				held.
    138    789    ahrens  *
    139   7754      Jeff  * The spa_config_lock[] is an array of rwlocks, ordered as follows:
    140   7754      Jeff  * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV.
    141   7754      Jeff  * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}().
    142   7754      Jeff  *
    143   7754      Jeff  * To read the configuration, it suffices to hold one of these locks as reader.
    144   7754      Jeff  * To modify the configuration, you must hold all locks as writer.  To modify
    145   7754      Jeff  * vdev state without altering the vdev tree's topology (e.g. online/offline),
    146   7754      Jeff  * you must hold SCL_STATE and SCL_ZIO as writer.
    147   7754      Jeff  *
    148   7754      Jeff  * We use these distinct config locks to avoid recursive lock entry.
    149   7754      Jeff  * For example, spa_sync() (which holds SCL_CONFIG as reader) induces
    150   7754      Jeff  * block allocations (SCL_ALLOC), which may require reading space maps
    151   7754      Jeff  * from disk (dmu_read() -> zio_read() -> SCL_ZIO).
    152   7754      Jeff  *
    153   7754      Jeff  * The spa config locks cannot be normal rwlocks because we need the
    154   7754      Jeff  * ability to hand off ownership.  For example, SCL_ZIO is acquired
    155   7754      Jeff  * by the issuing thread and later released by an interrupt thread.
    156   7754      Jeff  * They do, however, obey the usual write-wanted semantics to prevent
    157   7754      Jeff  * writer (i.e. system administrator) starvation.
    158   7754      Jeff  *
    159   7754      Jeff  * The lock acquisition rules are as follows:
    160   7754      Jeff  *
    161   7754      Jeff  * SCL_CONFIG
    162   7754      Jeff  *	Protects changes to the vdev tree topology, such as vdev
    163   7754      Jeff  *	add/remove/attach/detach.  Protects the dirty config list
    164   7754      Jeff  *	(spa_config_dirty_list) and the set of spares and l2arc devices.
    165   7754      Jeff  *
    166   7754      Jeff  * SCL_STATE
    167   7754      Jeff  *	Protects changes to pool state and vdev state, such as vdev
    168   7754      Jeff  *	online/offline/fault/degrade/clear.  Protects the dirty state list
    169   7754      Jeff  *	(spa_state_dirty_list) and global pool state (spa_state).
    170   7754      Jeff  *
    171   7754      Jeff  * SCL_ALLOC
    172   7754      Jeff  *	Protects changes to metaslab groups and classes.
    173   7754      Jeff  *	Held as reader by metaslab_alloc() and metaslab_claim().
    174   7754      Jeff  *
    175   7754      Jeff  * SCL_ZIO
    176   7754      Jeff  *	Held by bp-level zios (those which have no io_vd upon entry)
    177   7754      Jeff  *	to prevent changes to the vdev tree.  The bp-level zio implicitly
    178   7754      Jeff  *	protects all of its vdev child zios, which do not hold SCL_ZIO.
    179   7754      Jeff  *
    180   7754      Jeff  * SCL_FREE
    181   7754      Jeff  *	Protects changes to metaslab groups and classes.
    182   7754      Jeff  *	Held as reader by metaslab_free().  SCL_FREE is distinct from
    183   7754      Jeff  *	SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free
    184   7754      Jeff  *	blocks in zio_done() while another i/o that holds either
    185   7754      Jeff  *	SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete.
    186   7754      Jeff  *
    187   7754      Jeff  * SCL_VDEV
    188   7754      Jeff  *	Held as reader to prevent changes to the vdev tree during trivial
    189  10922      Jeff  *	inquiries such as bp_get_dsize().  SCL_VDEV is distinct from the
    190   7754      Jeff  *	other locks, and lower than all of them, to ensure that it's safe
    191   7754      Jeff  *	to acquire regardless of caller context.
    192   7754      Jeff  *
    193   7754      Jeff  * In addition, the following rules apply:
    194   7754      Jeff  *
    195   7754      Jeff  * (a)	spa_props_lock protects pool properties, spa_config and spa_config_list.
    196   7754      Jeff  *	The lock ordering is SCL_CONFIG > spa_props_lock.
    197   7754      Jeff  *
    198   7754      Jeff  * (b)	I/O operations on leaf vdevs.  For any zio operation that takes
    199   7754      Jeff  *	an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(),
    200   7754      Jeff  *	or zio_write_phys() -- the caller must ensure that the config cannot
    201   7754      Jeff  *	cannot change in the interim, and that the vdev cannot be reopened.
    202   7754      Jeff  *	SCL_STATE as reader suffices for both.
    203    789    ahrens  *
    204   1544  eschrock  * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
    205    789    ahrens  *
    206   1775     billm  *	spa_vdev_enter()	Acquire the namespace lock and the config lock
    207   1544  eschrock  *				for writing.
    208    789    ahrens  *
    209   1775     billm  *	spa_vdev_exit()		Release the config lock, wait for all I/O
    210   1775     billm  *				to complete, sync the updated configs to the
    211   1544  eschrock  *				cache, and release the namespace lock.
    212    789    ahrens  *
    213   7754      Jeff  * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
    214   7754      Jeff  * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
    215   7754      Jeff  * locking is, always, based on spa_namespace_lock and spa_config_lock[].
    216   7754      Jeff  *
    217   7754      Jeff  * spa_rename() is also implemented within this file since is requires
    218   7754      Jeff  * manipulation of the namespace.
    219    789    ahrens  */
    220    789    ahrens 
    221    789    ahrens static avl_tree_t spa_namespace_avl;
    222    789    ahrens kmutex_t spa_namespace_lock;
    223    789    ahrens static kcondvar_t spa_namespace_cv;
    224   1635   bonwick static int spa_active_count;
    225   2986  ek110237 int spa_max_replication_override = SPA_DVAS_PER_BP;
    226   2082  eschrock 
    227   3377  eschrock static kmutex_t spa_spare_lock;
    228   2082  eschrock static avl_tree_t spa_spare_avl;
    229   5450   brendan static kmutex_t spa_l2cache_lock;
    230   5450   brendan static avl_tree_t spa_l2cache_avl;
    231    789    ahrens 
    232    789    ahrens kmem_cache_t *spa_buffer_pool;
    233   8241      Jeff int spa_mode_global;
    234    789    ahrens 
    235    789    ahrens #ifdef ZFS_DEBUG
    236   4603    ahrens /* Everything except dprintf is on by default in debug builds */
    237   4603    ahrens int zfs_flags = ~ZFS_DEBUG_DPRINTF;
    238    789    ahrens #else
    239    789    ahrens int zfs_flags = 0;
    240    789    ahrens #endif
    241   3713    ahrens 
    242   3713    ahrens /*
    243   3713    ahrens  * zfs_recover can be set to nonzero to attempt to recover from
    244   3713    ahrens  * otherwise-fatal errors, typically caused by on-disk corruption.  When
    245   3713    ahrens  * set, calls to zfs_panic_recover() will turn into warning messages.
    246   3713    ahrens  */
    247   3713    ahrens int zfs_recover = 0;
    248    789    ahrens 
    249   5530   bonwick 
    250   5530   bonwick /*
    251   5530   bonwick  * ==========================================================================
    252   5530   bonwick  * SPA config locking
    253   5530   bonwick  * ==========================================================================
    254   5530   bonwick  */
    255   5530   bonwick static void
    256   7754      Jeff spa_config_lock_init(spa_t *spa)
    257   5530   bonwick {
    258   7754      Jeff 	for (int i = 0; i < SCL_LOCKS; i++) {
    259   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    260   7754      Jeff 		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
    261   7754      Jeff 		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
    262   7754      Jeff 		refcount_create(&scl->scl_count);
    263   7754      Jeff 		scl->scl_writer = NULL;
    264   7754      Jeff 		scl->scl_write_wanted = 0;
    265   7754      Jeff 	}
    266   5530   bonwick }
    267   5530   bonwick 
    268   5530   bonwick static void
    269   7754      Jeff spa_config_lock_destroy(spa_t *spa)
    270   5530   bonwick {
    271   7754      Jeff 	for (int i = 0; i < SCL_LOCKS; i++) {
    272   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    273   7754      Jeff 		mutex_destroy(&scl->scl_lock);
    274   7754      Jeff 		cv_destroy(&scl->scl_cv);
    275   7754      Jeff 		refcount_destroy(&scl->scl_count);
    276   7754      Jeff 		ASSERT(scl->scl_writer == NULL);
    277   7754      Jeff 		ASSERT(scl->scl_write_wanted == 0);
    278   7754      Jeff 	}
    279   7754      Jeff }
    280   7754      Jeff 
    281   7754      Jeff int
    282   7754      Jeff spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
    283   7754      Jeff {
    284   7754      Jeff 	for (int i = 0; i < SCL_LOCKS; i++) {
    285   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    286   7754      Jeff 		if (!(locks & (1 << i)))
    287   7754      Jeff 			continue;
    288   7754      Jeff 		mutex_enter(&scl->scl_lock);
    289   7754      Jeff 		if (rw == RW_READER) {
    290   7754      Jeff 			if (scl->scl_writer || scl->scl_write_wanted) {
    291   7754      Jeff 				mutex_exit(&scl->scl_lock);
    292   7754      Jeff 				spa_config_exit(spa, locks ^ (1 << i), tag);
    293   7754      Jeff 				return (0);
    294   7754      Jeff 			}
    295   7754      Jeff 		} else {
    296   7754      Jeff 			ASSERT(scl->scl_writer != curthread);
    297   7754      Jeff 			if (!refcount_is_zero(&scl->scl_count)) {
    298   7754      Jeff 				mutex_exit(&scl->scl_lock);
    299   7754      Jeff 				spa_config_exit(spa, locks ^ (1 << i), tag);
    300   7754      Jeff 				return (0);
    301   7754      Jeff 			}
    302   7754      Jeff 			scl->scl_writer = curthread;
    303   7754      Jeff 		}
    304   7754      Jeff 		(void) refcount_add(&scl->scl_count, tag);
    305   7754      Jeff 		mutex_exit(&scl->scl_lock);
    306   7754      Jeff 	}
    307   7754      Jeff 	return (1);
    308   5530   bonwick }
    309   5530   bonwick 
    310   5530   bonwick void
    311   7754      Jeff spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
    312   5530   bonwick {
    313   9846      Eric 	int wlocks_held = 0;
    314   9846      Eric 
    315   7754      Jeff 	for (int i = 0; i < SCL_LOCKS; i++) {
    316   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    317   9846      Eric 		if (scl->scl_writer == curthread)
    318   9846      Eric 			wlocks_held |= (1 << i);
    319   7754      Jeff 		if (!(locks & (1 << i)))
    320   7754      Jeff 			continue;
    321   7754      Jeff 		mutex_enter(&scl->scl_lock);
    322   7754      Jeff 		if (rw == RW_READER) {
    323   7754      Jeff 			while (scl->scl_writer || scl->scl_write_wanted) {
    324   7754      Jeff 				cv_wait(&scl->scl_cv, &scl->scl_lock);
    325   7754      Jeff 			}
    326   7754      Jeff 		} else {
    327   7754      Jeff 			ASSERT(scl->scl_writer != curthread);
    328   7754      Jeff 			while (!refcount_is_zero(&scl->scl_count)) {
    329   7754      Jeff 				scl->scl_write_wanted++;
    330   7754      Jeff 				cv_wait(&scl->scl_cv, &scl->scl_lock);
    331   7754      Jeff 				scl->scl_write_wanted--;
    332   7754      Jeff 			}
    333   7754      Jeff 			scl->scl_writer = curthread;
    334   7754      Jeff 		}
    335   7754      Jeff 		(void) refcount_add(&scl->scl_count, tag);
    336   7754      Jeff 		mutex_exit(&scl->scl_lock);
    337   5530   bonwick 	}
    338   9846      Eric 	ASSERT(wlocks_held <= locks);
    339   5530   bonwick }
    340   5530   bonwick 
    341   5530   bonwick void
    342   7754      Jeff spa_config_exit(spa_t *spa, int locks, void *tag)
    343   5530   bonwick {
    344   7754      Jeff 	for (int i = SCL_LOCKS - 1; i >= 0; i--) {
    345   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    346   7754      Jeff 		if (!(locks & (1 << i)))
    347   7754      Jeff 			continue;
    348   7754      Jeff 		mutex_enter(&scl->scl_lock);
    349   7754      Jeff 		ASSERT(!refcount_is_zero(&scl->scl_count));
    350   7754      Jeff 		if (refcount_remove(&scl->scl_count, tag) == 0) {
    351   7754      Jeff 			ASSERT(scl->scl_writer == NULL ||
    352   7754      Jeff 			    scl->scl_writer == curthread);
    353   7754      Jeff 			scl->scl_writer = NULL;	/* OK in either case */
    354   7754      Jeff 			cv_broadcast(&scl->scl_cv);
    355   7754      Jeff 		}
    356   7754      Jeff 		mutex_exit(&scl->scl_lock);
    357   7754      Jeff 	}
    358   7754      Jeff }
    359   5530   bonwick 
    360   7754      Jeff int
    361   7754      Jeff spa_config_held(spa_t *spa, int locks, krw_t rw)
    362   7754      Jeff {
    363   7754      Jeff 	int locks_held = 0;
    364   5530   bonwick 
    365   7754      Jeff 	for (int i = 0; i < SCL_LOCKS; i++) {
    366   7754      Jeff 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
    367   7754      Jeff 		if (!(locks & (1 << i)))
    368   7754      Jeff 			continue;
    369   7754      Jeff 		if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) ||
    370   7754      Jeff 		    (rw == RW_WRITER && scl->scl_writer == curthread))
    371   7754      Jeff 			locks_held |= 1 << i;
    372   5530   bonwick 	}
    373   5530   bonwick 
    374   7754      Jeff 	return (locks_held);
    375   5530   bonwick }
    376    789    ahrens 
    377    789    ahrens /*
    378    789    ahrens  * ==========================================================================
    379    789    ahrens  * SPA namespace functions
    380    789    ahrens  * ==========================================================================
    381    789    ahrens  */
    382    789    ahrens 
    383    789    ahrens /*
    384    789    ahrens  * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
    385    789    ahrens  * Returns NULL if no matching spa_t is found.
    386    789    ahrens  */
    387    789    ahrens spa_t *
    388    789    ahrens spa_lookup(const char *name)
    389    789    ahrens {
    390   7754      Jeff 	static spa_t search;	/* spa_t is large; don't allocate on stack */
    391   7754      Jeff 	spa_t *spa;
    392    789    ahrens 	avl_index_t where;
    393   4603    ahrens 	char c;
    394   4603    ahrens 	char *cp;
    395    789    ahrens 
    396    789    ahrens 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    397    789    ahrens 
    398   4603    ahrens 	/*
    399   4603    ahrens 	 * If it's a full dataset name, figure out the pool name and
    400   4603    ahrens 	 * just use that.
    401   4603    ahrens 	 */
    402   4603    ahrens 	cp = strpbrk(name, "/@");
    403   4603    ahrens 	if (cp) {
    404   4603    ahrens 		c = *cp;
    405   4603    ahrens 		*cp = '\0';
    406   4603    ahrens 	}
    407   4603    ahrens 
    408   7754      Jeff 	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
    409    789    ahrens 	spa = avl_find(&spa_namespace_avl, &search, &where);
    410   4603    ahrens 
    411   4603    ahrens 	if (cp)
    412   4603    ahrens 		*cp = c;
    413    789    ahrens 
    414    789    ahrens 	return (spa);
    415    789    ahrens }
    416    789    ahrens 
    417    789    ahrens /*
    418    789    ahrens  * Create an uninitialized spa_t with the given name.  Requires
    419    789    ahrens  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
    420    789    ahrens  * exist by calling spa_lookup() first.
    421    789    ahrens  */
    422    789    ahrens spa_t *
    423  10921       Tim spa_add(const char *name, nvlist_t *config, const char *altroot)
    424    789    ahrens {
    425    789    ahrens 	spa_t *spa;
    426   6643  eschrock 	spa_config_dirent_t *dp;
    427    789    ahrens 
    428    789    ahrens 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    429    789    ahrens 
    430    789    ahrens 	spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
    431    789    ahrens 
    432   4831   gw25295 	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
    433  11173  Jonathan 	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
    434  11173  Jonathan 	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
    435  11173  Jonathan 	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
    436  11173  Jonathan 	mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
    437  11173  Jonathan 	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
    438   4831   gw25295 	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
    439  10974      Jeff 	mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
    440  10974      Jeff 	mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
    441   4831   gw25295 
    442   4831   gw25295 	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
    443  11173  Jonathan 	cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
    444   4831   gw25295 	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
    445   7754      Jeff 	cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
    446  10922      Jeff 
    447  10922      Jeff 	for (int t = 0; t < TXG_SIZE; t++)
    448  10922      Jeff 		bplist_init(&spa->spa_free_bplist[t]);
    449  10922      Jeff 	bplist_init(&spa->spa_deferred_bplist);
    450   4831   gw25295 
    451   7754      Jeff 	(void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
    452    789    ahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
    453    789    ahrens 	spa->spa_freeze_txg = UINT64_MAX;
    454   1635   bonwick 	spa->spa_final_txg = UINT64_MAX;
    455  10921       Tim 	spa->spa_load_max_txg = UINT64_MAX;
    456  11173  Jonathan 	spa->spa_proc = &p0;
    457  11173  Jonathan 	spa->spa_proc_state = SPA_PROC_NONE;
    458    789    ahrens 
    459    789    ahrens 	refcount_create(&spa->spa_refcount);
    460   7754      Jeff 	spa_config_lock_init(spa);
    461    789    ahrens 
    462    789    ahrens 	avl_add(&spa_namespace_avl, spa);
    463   5329   gw25295 
    464   1635   bonwick 	/*
    465   1635   bonwick 	 * Set the alternate root, if there is one.
    466   1635   bonwick 	 */
    467   1635   bonwick 	if (altroot) {
    468   1635   bonwick 		spa->spa_root = spa_strdup(altroot);
    469   1635   bonwick 		spa_active_count++;
    470   1635   bonwick 	}
    471    789    ahrens 
    472   6643  eschrock 	/*
    473   6643  eschrock 	 * Every pool starts with the default cachefile
    474   6643  eschrock 	 */
    475   6643  eschrock 	list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t),
    476   6643  eschrock 	    offsetof(spa_config_dirent_t, scd_link));
    477   6643  eschrock 
    478   6643  eschrock 	dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP);
    479   6643  eschrock 	dp->scd_path = spa_strdup(spa_config_path);
    480   6643  eschrock 	list_insert_head(&spa->spa_config_list, dp);
    481  10921       Tim 
    482  10921       Tim 	if (config != NULL)
    483  10921       Tim 		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
    484   6643  eschrock 
    485    789    ahrens 	return (spa);
    486    789    ahrens }
    487    789    ahrens 
    488    789    ahrens /*
    489    789    ahrens  * Removes a spa_t from the namespace, freeing up any memory used.  Requires
    490    789    ahrens  * spa_namespace_lock.  This is called only after the spa_t has been closed and
    491    789    ahrens  * deactivated.
    492    789    ahrens  */
    493    789    ahrens void
    494    789    ahrens spa_remove(spa_t *spa)
    495    789    ahrens {
    496   6643  eschrock 	spa_config_dirent_t *dp;
    497   6643  eschrock 
    498    789    ahrens 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    499    789    ahrens 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
    500    789    ahrens 
    501    789    ahrens 	avl_remove(&spa_namespace_avl, spa);
    502    789    ahrens 	cv_broadcast(&spa_namespace_cv);
    503    789    ahrens 
    504   1635   bonwick 	if (spa->spa_root) {
    505    789    ahrens 		spa_strfree(spa->spa_root);
    506   1635   bonwick 		spa_active_count--;
    507   1635   bonwick 	}
    508    789    ahrens 
    509   6643  eschrock 	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
    510   6643  eschrock 		list_remove(&spa->spa_config_list, dp);
    511   6643  eschrock 		if (dp->scd_path != NULL)
    512   6643  eschrock 			spa_strfree(dp->scd_path);
    513   6643  eschrock 		kmem_free(dp, sizeof (spa_config_dirent_t));
    514   6643  eschrock 	}
    515   6643  eschrock 
    516   6643  eschrock 	list_destroy(&spa->spa_config_list);
    517    789    ahrens 
    518    789    ahrens 	spa_config_set(spa, NULL);
    519    789    ahrens 
    520    789    ahrens 	refcount_destroy(&spa->spa_refcount);
    521   4787    ahrens 
    522   7754      Jeff 	spa_config_lock_destroy(spa);
    523   4831   gw25295 
    524  10922      Jeff 	for (int t = 0; t < TXG_SIZE; t++)
    525  10922      Jeff 		bplist_fini(&spa->spa_free_bplist[t]);
    526  10922      Jeff 	bplist_fini(&spa->spa_deferred_bplist);
    527  10922      Jeff 
    528   4831   gw25295 	cv_destroy(&spa->spa_async_cv);
    529  11173  Jonathan 	cv_destroy(&spa->spa_proc_cv);
    530   4831   gw25295 	cv_destroy(&spa->spa_scrub_io_cv);
    531   7754      Jeff 	cv_destroy(&spa->spa_suspend_cv);
    532   4831   gw25295 
    533   4831   gw25295 	mutex_destroy(&spa->spa_async_lock);
    534  11173  Jonathan 	mutex_destroy(&spa->spa_errlist_lock);
    535  11173  Jonathan 	mutex_destroy(&spa->spa_errlog_lock);
    536  11173  Jonathan 	mutex_destroy(&spa->spa_history_lock);
    537  11173  Jonathan 	mutex_destroy(&spa->spa_proc_lock);
    538  11173  Jonathan 	mutex_destroy(&spa->spa_props_lock);
    539   4831   gw25295 	mutex_destroy(&spa->spa_scrub_lock);
    540   7754      Jeff 	mutex_destroy(&spa->spa_suspend_lock);
    541  10974      Jeff 	mutex_destroy(&spa->spa_vdev_top_lock);
    542    789    ahrens 
    543    789    ahrens 	kmem_free(spa, sizeof (spa_t));
    544    789    ahrens }
    545    789    ahrens 
    546    789    ahrens /*
    547    789    ahrens  * Given a pool, return the next pool in the namespace, or NULL if there is
    548    789    ahrens  * none.  If 'prev' is NULL, return the first pool.
    549    789    ahrens  */
    550    789    ahrens spa_t *
    551    789    ahrens spa_next(spa_t *prev)
    552    789    ahrens {
    553    789    ahrens 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    554    789    ahrens 
    555    789    ahrens 	if (prev)
    556    789    ahrens 		return (AVL_NEXT(&spa_namespace_avl, prev));
    557    789    ahrens 	else
    558    789    ahrens 		return (avl_first(&spa_namespace_avl));
    559    789    ahrens }
    560    789    ahrens 
    561    789    ahrens /*
    562    789    ahrens  * ==========================================================================
    563    789    ahrens  * SPA refcount functions
    564    789    ahrens  * ==========================================================================
    565    789    ahrens  */
    566    789    ahrens 
    567    789    ahrens /*
    568    789    ahrens  * Add a reference to the given spa_t.  Must have at least one reference, or
    569    789    ahrens  * have the namespace lock held.
    570    789    ahrens  */
    571    789    ahrens void
    572    789    ahrens spa_open_ref(spa_t *spa, void *tag)
    573    789    ahrens {
    574   7046    ahrens 	ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
    575    789    ahrens 	    MUTEX_HELD(&spa_namespace_lock));
    576    789    ahrens 	(void) refcount_add(&spa->spa_refcount, tag);
    577    789    ahrens }
    578    789    ahrens 
    579    789    ahrens /*
    580    789    ahrens  * Remove a reference to the given spa_t.  Must have at least one reference, or
    581    789    ahrens  * have the namespace lock held.
    582    789    ahrens  */
    583    789    ahrens void
    584    789    ahrens spa_close(spa_t *spa, void *tag)
    585    789    ahrens {
    586   7046    ahrens 	ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
    587    789    ahrens 	    MUTEX_HELD(&spa_namespace_lock));
    588    789    ahrens 	(void) refcount_remove(&spa->spa_refcount, tag);
    589    789    ahrens }
    590    789    ahrens 
    591    789    ahrens /*
    592    789    ahrens  * Check to see if the spa refcount is zero.  Must be called with
    593   7046    ahrens  * spa_namespace_lock held.  We really compare against spa_minref, which is the
    594    789    ahrens  * number of references acquired when opening a pool
    595    789    ahrens  */
    596    789    ahrens boolean_t
    597    789    ahrens spa_refcount_zero(spa_t *spa)
    598    789    ahrens {
    599    789    ahrens 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    600    789    ahrens 
    601   7046    ahrens 	return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
    602   2082  eschrock }
    603   2082  eschrock 
    604   2082  eschrock /*
    605   2082  eschrock  * ==========================================================================
    606   5450   brendan  * SPA spare and l2cache tracking
    607   2082  eschrock  * ==========================================================================
    608   2082  eschrock  */
    609   5450   brendan 
    610   5450   brendan /*
    611   5450   brendan  * Hot spares and cache devices are tracked using the same code below,
    612   5450   brendan  * for 'auxiliary' devices.
    613   5450   brendan  */
    614   5450   brendan 
    615   5450   brendan typedef struct spa_aux {
    616   5450   brendan 	uint64_t	aux_guid;
    617   5450   brendan 	uint64_t	aux_pool;
    618   5450   brendan 	avl_node_t	aux_avl;
    619   5450   brendan 	int		aux_count;
    620   5450   brendan } spa_aux_t;
    621   5450   brendan 
    622   5450   brendan static int
    623   5450   brendan spa_aux_compare(const void *a, const void *b)
    624   5450   brendan {
    625   5450   brendan 	const spa_aux_t *sa = a;
    626   5450   brendan 	const spa_aux_t *sb = b;
    627   5450   brendan 
    628   5450   brendan 	if (sa->aux_guid < sb->aux_guid)
    629   5450   brendan 		return (-1);
    630   5450   brendan 	else if (sa->aux_guid > sb->aux_guid)
    631   5450   brendan 		return (1);
    632   5450   brendan 	else
    633   5450   brendan 		return (0);
    634   5450   brendan }
    635   5450   brendan 
    636   5450   brendan void
    637   5450   brendan spa_aux_add(vdev_t *vd, avl_tree_t *avl)
    638   5450   brendan {
    639   5450   brendan 	avl_index_t where;
    640   5450   brendan 	spa_aux_t search;
    641   5450   brendan 	spa_aux_t *aux;
    642   5450   brendan 
    643   5450   brendan 	search.aux_guid = vd->vdev_guid;
    644   5450   brendan 	if ((aux = avl_find(avl, &search, &where)) != NULL) {
    645   5450   brendan 		aux->aux_count++;
    646   5450   brendan 	} else {
    647   5450   brendan 		aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP);
    648   5450   brendan 		aux->aux_guid = vd->vdev_guid;
    649   5450   brendan 		aux->aux_count = 1;
    650   5450   brendan 		avl_insert(avl, aux, where);
    651   5450   brendan 	}
    652   5450   brendan }
    653   5450   brendan 
    654   5450   brendan void
    655   5450   brendan spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
    656   5450   brendan {
    657   5450   brendan 	spa_aux_t search;
    658   5450   brendan 	spa_aux_t *aux;
    659   5450   brendan 	avl_index_t where;
    660   5450   brendan 
    661   5450   brendan 	search.aux_guid = vd->vdev_guid;
    662   5450   brendan 	aux = avl_find(avl, &search, &where);
    663   5450   brendan 
    664   5450   brendan 	ASSERT(aux != NULL);
    665   5450   brendan 
    666   5450   brendan 	if (--aux->aux_count == 0) {
    667   5450   brendan 		avl_remove(avl, aux);
    668   5450   brendan 		kmem_free(aux, sizeof (spa_aux_t));
    669   5450   brendan 	} else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
    670   5450   brendan 		aux->aux_pool = 0ULL;
    671   5450   brendan 	}
    672   5450   brendan }
    673   5450   brendan 
    674   5450   brendan boolean_t
    675   7214     lling spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
    676   5450   brendan {
    677   5450   brendan 	spa_aux_t search, *found;
    678   5450   brendan 
    679   5450   brendan 	search.aux_guid = guid;
    680   7214     lling 	found = avl_find(avl, &search, NULL);
    681   5450   brendan 
    682   5450   brendan 	if (pool) {
    683   5450   brendan 		if (found)
    684   5450   brendan 			*pool = found->aux_pool;
    685   5450   brendan 		else
    686   5450   brendan 			*pool = 0ULL;
    687   7214     lling 	}
    688   7214     lling 
    689   7214     lling 	if (refcnt) {
    690   7214     lling 		if (found)
    691   7214     lling 			*refcnt = found->aux_count;
    692   7214     lling 		else
    693   7214     lling 			*refcnt = 0;
    694   5450   brendan 	}
    695   5450   brendan 
    696   5450   brendan 	return (found != NULL);
    697   5450   brendan }
    698   5450   brendan 
    699   5450   brendan void
    700   5450   brendan spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
    701   5450   brendan {
    702   5450   brendan 	spa_aux_t search, *found;
    703   5450   brendan 	avl_index_t where;
    704   5450   brendan 
    705   5450   brendan 	search.aux_guid = vd->vdev_guid;
    706   5450   brendan 	found = avl_find(avl, &search, &where);
    707   5450   brendan 	ASSERT(found != NULL);
    708   5450   brendan 	ASSERT(found->aux_pool == 0ULL);
    709   5450   brendan 
    710   5450   brendan 	found->aux_pool = spa_guid(vd->vdev_spa);
    711   5450   brendan }
    712   2082  eschrock 
    713   2082  eschrock /*
    714   3377  eschrock  * Spares are tracked globally due to the following constraints:
    715   3377  eschrock  *
    716   3377  eschrock  * 	- A spare may be part of multiple pools.
    717   3377  eschrock  * 	- A spare may be added to a pool even if it's actively in use within
    718   3377  eschrock  *	  another pool.
    719   3377  eschrock  * 	- A spare in use in any pool can only be the source of a replacement if
    720   3377  eschrock  *	  the target is a spare in the same pool.
    721   3377  eschrock  *
    722   3377  eschrock  * We keep track of all spares on the system through the use of a reference
    723   3377  eschrock  * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
    724   3377  eschrock  * spare, then we bump the reference count in the AVL tree.  In addition, we set
    725   3377  eschrock  * the 'vdev_isspare' member to indicate that the device is a spare (active or
    726   3377  eschrock  * inactive).  When a spare is made active (used to replace a device in the
    727   3377  eschrock  * pool), we also keep track of which pool its been made a part of.
    728   3377  eschrock  *
    729   3377  eschrock  * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
    730   3377  eschrock  * called under the spa_namespace lock as part of vdev reconfiguration.  The
    731   3377  eschrock  * separate spare lock exists for the status query path, which does not need to
    732   3377  eschrock  * be completely consistent with respect to other vdev configuration changes.
    733   2082  eschrock  */
    734   3377  eschrock 
    735   2082  eschrock static int
    736   2082  eschrock spa_spare_compare(const void *a, const void *b)
    737   2082  eschrock {
    738   5450   brendan 	return (spa_aux_compare(a, b));
    739   2082  eschrock }
    740   2082  eschrock 
    741   2082  eschrock void
    742   3377  eschrock spa_spare_add(vdev_t *vd)
    743   2082  eschrock {
    744   2082  eschrock 	mutex_enter(&spa_spare_lock);
    745   3377  eschrock 	ASSERT(!vd->vdev_isspare);
    746   5450   brendan 	spa_aux_add(vd, &spa_spare_avl);
    747   3377  eschrock 	vd->vdev_isspare = B_TRUE;
    748   2082  eschrock 	mutex_exit(&spa_spare_lock);
    749   2082  eschrock }
    750   2082  eschrock 
    751   2082  eschrock void
    752   3377  eschrock spa_spare_remove(vdev_t *vd)
    753   2082  eschrock {
    754   2082  eschrock 	mutex_enter(&spa_spare_lock);
    755   3377  eschrock 	ASSERT(vd->vdev_isspare);
    756   5450   brendan 	spa_aux_remove(vd, &spa_spare_avl);
    757   3377  eschrock 	vd->vdev_isspare = B_FALSE;
    758   2082  eschrock 	mutex_exit(&spa_spare_lock);
    759   2082  eschrock }
    760   2082  eschrock 
    761   2082  eschrock boolean_t
    762   7214     lling spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt)
    763   2082  eschrock {
    764   5450   brendan 	boolean_t found;
    765   2082  eschrock 
    766   2082  eschrock 	mutex_enter(&spa_spare_lock);
    767   7214     lling 	found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl);
    768   2082  eschrock 	mutex_exit(&spa_spare_lock);
    769   2082  eschrock 
    770   5450   brendan 	return (found);
    771   3377  eschrock }
    772   3377  eschrock 
    773   3377  eschrock void
    774   3377  eschrock spa_spare_activate(vdev_t *vd)
    775   3377  eschrock {
    776   3377  eschrock 	mutex_enter(&spa_spare_lock);
    777   3377  eschrock 	ASSERT(vd->vdev_isspare);
    778   5450   brendan 	spa_aux_activate(vd, &spa_spare_avl);
    779   5450   brendan 	mutex_exit(&spa_spare_lock);
    780   5450   brendan }
    781   3377  eschrock 
    782   5450   brendan /*
    783   5450   brendan  * Level 2 ARC devices are tracked globally for the same reasons as spares.
    784   5450   brendan  * Cache devices currently only support one pool per cache device, and so
    785   5450   brendan  * for these devices the aux reference count is currently unused beyond 1.
    786   5450   brendan  */
    787   3377  eschrock 
    788   5450   brendan static int
    789   5450   brendan spa_l2cache_compare(const void *a, const void *b)
    790   5450   brendan {
    791   5450   brendan 	return (spa_aux_compare(a, b));
    792   5450   brendan }
    793   5450   brendan 
    794   5450   brendan void
    795   5450   brendan spa_l2cache_add(vdev_t *vd)
    796   5450   brendan {
    797   5450   brendan 	mutex_enter(&spa_l2cache_lock);
    798   5450   brendan 	ASSERT(!vd->vdev_isl2cache);
    799   5450   brendan 	spa_aux_add(vd, &spa_l2cache_avl);
    800   5450   brendan 	vd->vdev_isl2cache = B_TRUE;
    801   5450   brendan 	mutex_exit(&spa_l2cache_lock);
    802   5450   brendan }
    803   5450   brendan 
    804   5450   brendan void
    805   5450   brendan spa_l2cache_remove(vdev_t *vd)
    806   5450   brendan {
    807   5450   brendan 	mutex_enter(&spa_l2cache_lock);
    808   5450   brendan 	ASSERT(vd->vdev_isl2cache);
    809   5450   brendan 	spa_aux_remove(vd, &spa_l2cache_avl);
    810   5450   brendan 	vd->vdev_isl2cache = B_FALSE;
    811   5450   brendan 	mutex_exit(&spa_l2cache_lock);
    812   5450   brendan }
    813   5450   brendan 
    814   5450   brendan boolean_t
    815   5450   brendan spa_l2cache_exists(uint64_t guid, uint64_t *pool)
    816   5450   brendan {
    817   5450   brendan 	boolean_t found;
    818   5450   brendan 
    819   5450   brendan 	mutex_enter(&spa_l2cache_lock);
    820   7214     lling 	found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl);
    821   5450   brendan 	mutex_exit(&spa_l2cache_lock);
    822   5450   brendan 
    823   5450   brendan 	return (found);
    824   5450   brendan }
    825   5450   brendan 
    826   5450   brendan void
    827   5450   brendan spa_l2cache_activate(vdev_t *vd)
    828   5450   brendan {
    829   5450   brendan 	mutex_enter(&spa_l2cache_lock);
    830   5450   brendan 	ASSERT(vd->vdev_isl2cache);
    831   5450   brendan 	spa_aux_activate(vd, &spa_l2cache_avl);
    832   5450   brendan 	mutex_exit(&spa_l2cache_lock);
    833   5450   brendan }
    834   5450   brendan 
    835    789    ahrens /*
    836    789    ahrens  * ==========================================================================
    837    789    ahrens  * SPA vdev locking
    838    789    ahrens  * ==========================================================================
    839    789    ahrens  */
    840    789    ahrens 
    841    789    ahrens /*
    842   1544  eschrock  * Lock the given spa_t for the purpose of adding or removing a vdev.
    843   1544  eschrock  * Grabs the global spa_namespace_lock plus the spa config lock for writing.
    844    789    ahrens  * It returns the next transaction group for the spa_t.
    845    789    ahrens  */
    846    789    ahrens uint64_t
    847    789    ahrens spa_vdev_enter(spa_t *spa)
    848    789    ahrens {
    849  11125      Jeff 	mutex_enter(&spa->spa_vdev_top_lock);
    850   4451  eschrock 	mutex_enter(&spa_namespace_lock);
    851  10594    George 	return (spa_vdev_config_enter(spa));
    852  10594    George }
    853  10594    George 
    854  10594    George /*
    855  10594    George  * Internal implementation for spa_vdev_enter().  Used when a vdev
    856  10594    George  * operation requires multiple syncs (i.e. removing a device) while
    857  10594    George  * keeping the spa_namespace_lock held.
    858  10594    George  */
    859  10594    George uint64_t
    860  10594    George spa_vdev_config_enter(spa_t *spa)
    861  10594    George {
    862  10594    George 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    863   4451  eschrock 
    864   7754      Jeff 	spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
    865    789    ahrens 
    866    789    ahrens 	return (spa_last_synced_txg(spa) + 1);
    867    789    ahrens }
    868    789    ahrens 
    869    789    ahrens /*
    870  10594    George  * Used in combination with spa_vdev_config_enter() to allow the syncing
    871  10594    George  * of multiple transactions without releasing the spa_namespace_lock.
    872    789    ahrens  */
    873  10594    George void
    874  10594    George spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
    875    789    ahrens {
    876  10594    George 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    877  10594    George 
    878   1585   bonwick 	int config_changed = B_FALSE;
    879    789    ahrens 
    880   1635   bonwick 	ASSERT(txg > spa_last_synced_txg(spa));
    881   1585   bonwick 
    882   7754      Jeff 	spa->spa_pending_vdev = NULL;
    883   7754      Jeff 
    884   1585   bonwick 	/*
    885   1585   bonwick 	 * Reassess the DTLs.
    886   1585   bonwick 	 */
    887   1635   bonwick 	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
    888   1585   bonwick 
    889   1585   bonwick 	/*
    890   1635   bonwick 	 * If the config changed, notify the scrub thread that it must restart.
    891   1585   bonwick 	 */
    892   7754      Jeff 	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
    893   7046    ahrens 		dsl_pool_scrub_restart(spa->spa_dsl_pool);
    894   1585   bonwick 		config_changed = B_TRUE;
    895  10685    George 		spa->spa_config_generation++;
    896   1585   bonwick 	}
    897    789    ahrens 
    898  10594    George 	/*
    899  10594    George 	 * Verify the metaslab classes.
    900  10594    George 	 */
    901  10922      Jeff 	ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
    902  10922      Jeff 	ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
    903  10594    George 
    904   7754      Jeff 	spa_config_exit(spa, SCL_ALL, spa);
    905  10594    George 
    906  10594    George 	/*
    907  10594    George 	 * Panic the system if the specified tag requires it.  This
    908  10594    George 	 * is useful for ensuring that configurations are updated
    909  10594    George 	 * transactionally.
    910  10594    George 	 */
    911  10594    George 	if (zio_injection_enabled)
    912  10594    George 		zio_handle_panic_injection(spa, tag);
    913    789    ahrens 
    914    789    ahrens 	/*
    915    789    ahrens 	 * Note: this txg_wait_synced() is important because it ensures
    916    789    ahrens 	 * that there won't be more than one config change per txg.
    917    789    ahrens 	 * This allows us to use the txg as the generation number.
    918    789    ahrens 	 */
    919    789    ahrens 	if (error == 0)
    920    789    ahrens 		txg_wait_synced(spa->spa_dsl_pool, txg);
    921    789    ahrens 
    922    789    ahrens 	if (vd != NULL) {
    923   8241      Jeff 		ASSERT(!vd->vdev_detached || vd->vdev_dtl_smo.smo_object == 0);
    924   8241      Jeff 		spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
    925    789    ahrens 		vdev_free(vd);
    926   8241      Jeff 		spa_config_exit(spa, SCL_ALL, spa);
    927    789    ahrens 	}
    928    789    ahrens 
    929    789    ahrens 	/*
    930   1585   bonwick 	 * If the config changed, update the config cache.
    931    789    ahrens 	 */
    932   1585   bonwick 	if (config_changed)
    933   6643  eschrock 		spa_config_sync(spa, B_FALSE, B_TRUE);
    934  10594    George }
    935   1544  eschrock 
    936  10594    George /*
    937  10594    George  * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
    938  10594    George  * locking of spa_vdev_enter(), we also want make sure the transactions have
    939  10594    George  * synced to disk, and then update the global configuration cache with the new
    940  10594    George  * information.
    941  10594    George  */
    942  10594    George int
    943  10594    George spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
    944  10594    George {
    945  10594    George 	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
    946  11125      Jeff 	mutex_exit(&spa_namespace_lock);
    947  10974      Jeff 	mutex_exit(&spa->spa_vdev_top_lock);
    948   7754      Jeff 
    949   7754      Jeff 	return (error);
    950   7754      Jeff }
    951   7754      Jeff 
    952   7754      Jeff /*
    953   7754      Jeff  * Lock the given spa_t for the purpose of changing vdev state.
    954   7754      Jeff  */
    955   7754      Jeff void
    956  10685    George spa_vdev_state_enter(spa_t *spa, int oplocks)
    957   7754      Jeff {
    958  10685    George 	int locks = SCL_STATE_ALL | oplocks;
    959  10685    George 
    960  10685    George 	spa_config_enter(spa, locks, spa, RW_WRITER);
    961  10685    George 	spa->spa_vdev_locks = locks;
    962   7754      Jeff }
    963   7754      Jeff 
    964   7754      Jeff int
    965   7754      Jeff spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
    966   7754      Jeff {
    967  10922      Jeff 	if (vd != NULL || error == 0)
    968  10922      Jeff 		vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
    969  10922      Jeff 		    0, 0, B_FALSE);
    970  10922      Jeff 
    971  10685    George 	if (vd != NULL) {
    972   7754      Jeff 		vdev_state_dirty(vd->vdev_top);
    973  10685    George 		spa->spa_config_generation++;
    974  10685    George 	}
    975   7754      Jeff 
    976  10685    George 	ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
    977  10685    George 	spa_config_exit(spa, spa->spa_vdev_locks, spa);
    978   8241      Jeff 
    979   8241      Jeff 	/*
    980   8241      Jeff 	 * If anything changed, wait for it to sync.  This ensures that,
    981   8241      Jeff 	 * from the system administrator's perspective, zpool(1M) commands
    982   8241      Jeff 	 * are synchronous.  This is important for things like zpool offline:
    983   8241      Jeff 	 * when the command completes, you expect no further I/O from ZFS.
    984   8241      Jeff 	 */
    985   8241      Jeff 	if (vd != NULL)
    986   8241      Jeff 		txg_wait_synced(spa->spa_dsl_pool, 0);
    987    789    ahrens 
    988    789    ahrens 	return (error);
    989    789    ahrens }
    990    789    ahrens 
    991    789    ahrens /*
    992    789    ahrens  * ==========================================================================
    993    789    ahrens  * Miscellaneous functions
    994    789    ahrens  * ==========================================================================
    995    789    ahrens  */
    996    789    ahrens 
    997    789    ahrens /*
    998    789    ahrens  * Rename a spa_t.
    999    789    ahrens  */
   1000    789    ahrens int
   1001    789    ahrens spa_rename(const char *name, const char *newname)
   1002    789    ahrens {
   1003    789    ahrens 	spa_t *spa;
   1004    789    ahrens 	int err;
   1005    789    ahrens 
   1006    789    ahrens 	/*
   1007    789    ahrens 	 * Lookup the spa_t and grab the config lock for writing.  We need to
   1008    789    ahrens 	 * actually open the pool so that we can sync out the necessary labels.
   1009    789    ahrens 	 * It's OK to call spa_open() with the namespace lock held because we
   1010   1544  eschrock 	 * allow recursive calls for other reasons.
   1011    789    ahrens 	 */
   1012    789    ahrens 	mutex_enter(&spa_namespace_lock);
   1013    789    ahrens 	if ((err = spa_open(name, &spa, FTAG)) != 0) {
   1014    789    ahrens 		mutex_exit(&spa_namespace_lock);
   1015    789    ahrens 		return (err);
   1016    789    ahrens 	}
   1017    789    ahrens 
   1018   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1019    789    ahrens 
   1020    789    ahrens 	avl_remove(&spa_namespace_avl, spa);
   1021   7754      Jeff 	(void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name));
   1022    789    ahrens 	avl_add(&spa_namespace_avl, spa);
   1023    789    ahrens 
   1024    789    ahrens 	/*
   1025    789    ahrens 	 * Sync all labels to disk with the new names by marking the root vdev
   1026    789    ahrens 	 * dirty and waiting for it to sync.  It will pick up the new pool name
   1027    789    ahrens 	 * during the sync.
   1028    789    ahrens 	 */
   1029    789    ahrens 	vdev_config_dirty(spa->spa_root_vdev);
   1030    789    ahrens 
   1031   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   1032    789    ahrens 
   1033   1635   bonwick 	txg_wait_synced(spa->spa_dsl_pool, 0);
   1034    789    ahrens 
   1035    789    ahrens 	/*
   1036    789    ahrens 	 * Sync the updated config cache.
   1037    789    ahrens 	 */
   1038   6643  eschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
   1039    789    ahrens 
   1040    789    ahrens 	spa_close(spa, FTAG);
   1041    789    ahrens 
   1042    789    ahrens 	mutex_exit(&spa_namespace_lock);
   1043    789    ahrens 
   1044    789    ahrens 	return (0);
   1045    789    ahrens }
   1046    789    ahrens 
   1047    789    ahrens 
   1048    789    ahrens /*
   1049    789    ahrens  * Determine whether a pool with given pool_guid exists.  If device_guid is
   1050    789    ahrens  * non-zero, determine whether the pool exists *and* contains a device with the
   1051    789    ahrens  * specified device_guid.
   1052    789    ahrens  */
   1053    789    ahrens boolean_t
   1054    789    ahrens spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
   1055    789    ahrens {
   1056    789    ahrens 	spa_t *spa;
   1057    789    ahrens 	avl_tree_t *t = &spa_namespace_avl;
   1058    789    ahrens 
   1059   1544  eschrock 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
   1060    789    ahrens 
   1061    789    ahrens 	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
   1062    789    ahrens 		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
   1063    789    ahrens 			continue;
   1064    789    ahrens 		if (spa->spa_root_vdev == NULL)
   1065    789    ahrens 			continue;
   1066   3377  eschrock 		if (spa_guid(spa) == pool_guid) {
   1067   3377  eschrock 			if (device_guid == 0)
   1068   3377  eschrock 				break;
   1069   3377  eschrock 
   1070   3377  eschrock 			if (vdev_lookup_by_guid(spa->spa_root_vdev,
   1071   3377  eschrock 			    device_guid) != NULL)
   1072   3377  eschrock 				break;
   1073   3377  eschrock 
   1074   3377  eschrock 			/*
   1075   4527    perrin 			 * Check any devices we may be in the process of adding.
   1076   3377  eschrock 			 */
   1077   3377  eschrock 			if (spa->spa_pending_vdev) {
   1078   3377  eschrock 				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
   1079   3377  eschrock 				    device_guid) != NULL)
   1080   3377  eschrock 					break;
   1081   3377  eschrock 			}
   1082   3377  eschrock 		}
   1083    789    ahrens 	}
   1084    789    ahrens 
   1085    789    ahrens 	return (spa != NULL);
   1086    789    ahrens }
   1087    789    ahrens 
   1088    789    ahrens char *
   1089    789    ahrens spa_strdup(const char *s)
   1090    789    ahrens {
   1091    789    ahrens 	size_t len;
   1092    789    ahrens 	char *new;
   1093    789    ahrens 
   1094    789    ahrens 	len = strlen(s);
   1095    789    ahrens 	new = kmem_alloc(len + 1, KM_SLEEP);
   1096    789    ahrens 	bcopy(s, new, len);
   1097    789    ahrens 	new[len] = '\0';
   1098    789    ahrens 
   1099    789    ahrens 	return (new);
   1100    789    ahrens }
   1101    789    ahrens 
   1102    789    ahrens void
   1103    789    ahrens spa_strfree(char *s)
   1104    789    ahrens {
   1105    789    ahrens 	kmem_free(s, strlen(s) + 1);
   1106    789    ahrens }
   1107    789    ahrens 
   1108    789    ahrens uint64_t
   1109    789    ahrens spa_get_random(uint64_t range)
   1110    789    ahrens {
   1111    789    ahrens 	uint64_t r;
   1112    789    ahrens 
   1113    789    ahrens 	ASSERT(range != 0);
   1114    789    ahrens 
   1115    789    ahrens 	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
   1116    789    ahrens 
   1117    789    ahrens 	return (r % range);
   1118    789    ahrens }
   1119    789    ahrens 
   1120    789    ahrens void
   1121  10922      Jeff sprintf_blkptr(char *buf, const blkptr_t *bp)
   1122    789    ahrens {
   1123  10922      Jeff 	char *type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
   1124  10922      Jeff 	char *checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
   1125  10922      Jeff 	char *compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
   1126    789    ahrens 
   1127  10922      Jeff 	SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
   1128    789    ahrens }
   1129    789    ahrens 
   1130    789    ahrens void
   1131    789    ahrens spa_freeze(spa_t *spa)
   1132    789    ahrens {
   1133    789    ahrens 	uint64_t freeze_txg = 0;
   1134    789    ahrens 
   1135   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1136    789    ahrens 	if (spa->spa_freeze_txg == UINT64_MAX) {
   1137    789    ahrens 		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
   1138    789    ahrens 		spa->spa_freeze_txg = freeze_txg;
   1139    789    ahrens 	}
   1140   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   1141    789    ahrens 	if (freeze_txg != 0)
   1142    789    ahrens 		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
   1143    789    ahrens }
   1144    789    ahrens 
   1145   3713    ahrens void
   1146   3713    ahrens zfs_panic_recover(const char *fmt, ...)
   1147   3713    ahrens {
   1148   3713    ahrens 	va_list adx;
   1149   3713    ahrens 
   1150   3713    ahrens 	va_start(adx, fmt);
   1151   3713    ahrens 	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
   1152   3713    ahrens 	va_end(adx);
   1153   3713    ahrens }
   1154   3713    ahrens 
   1155    789    ahrens /*
   1156    789    ahrens  * ==========================================================================
   1157    789    ahrens  * Accessor functions
   1158    789    ahrens  * ==========================================================================
   1159    789    ahrens  */
   1160    789    ahrens 
   1161   7837   Matthew boolean_t
   1162   7837   Matthew spa_shutting_down(spa_t *spa)
   1163    789    ahrens {
   1164   7837   Matthew 	return (spa->spa_async_suspended);
   1165    789    ahrens }
   1166    789    ahrens 
   1167    789    ahrens dsl_pool_t *
   1168    789    ahrens spa_get_dsl(spa_t *spa)
   1169    789    ahrens {
   1170    789    ahrens 	return (spa->spa_dsl_pool);
   1171    789    ahrens }
   1172    789    ahrens 
   1173    789    ahrens blkptr_t *
   1174    789    ahrens spa_get_rootblkptr(spa_t *spa)
   1175    789    ahrens {
   1176    789    ahrens 	return (&spa->spa_ubsync.ub_rootbp);
   1177    789    ahrens }
   1178    789    ahrens 
   1179    789    ahrens void
   1180    789    ahrens spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
   1181    789    ahrens {
   1182    789    ahrens 	spa->spa_uberblock.ub_rootbp = *bp;
   1183    789    ahrens }
   1184    789    ahrens 
   1185    789    ahrens void
   1186    789    ahrens spa_altroot(spa_t *spa, char *buf, size_t buflen)
   1187    789    ahrens {
   1188    789    ahrens 	if (spa->spa_root == NULL)
   1189    789    ahrens 		buf[0] = '\0';
   1190    789    ahrens 	else
   1191    789    ahrens 		(void) strncpy(buf, spa->spa_root, buflen);
   1192    789    ahrens }
   1193    789    ahrens 
   1194    789    ahrens int
   1195    789    ahrens spa_sync_pass(spa_t *spa)
   1196    789    ahrens {
   1197    789    ahrens 	return (spa->spa_sync_pass);
   1198    789    ahrens }
   1199    789    ahrens 
   1200    789    ahrens char *
   1201    789    ahrens spa_name(spa_t *spa)
   1202    789    ahrens {
   1203    789    ahrens 	return (spa->spa_name);
   1204    789    ahrens }
   1205    789    ahrens 
   1206    789    ahrens uint64_t
   1207    789    ahrens spa_guid(spa_t *spa)
   1208    789    ahrens {
   1209   2174  eschrock 	/*
   1210   2174  eschrock 	 * If we fail to parse the config during spa_load(), we can go through
   1211   2174  eschrock 	 * the error path (which posts an ereport) and end up here with no root
   1212   2174  eschrock 	 * vdev.  We stash the original pool guid in 'spa_load_guid' to handle
   1213   2174  eschrock 	 * this case.
   1214   2174  eschrock 	 */
   1215   2174  eschrock 	if (spa->spa_root_vdev != NULL)
   1216   2174  eschrock 		return (spa->spa_root_vdev->vdev_guid);
   1217   2174  eschrock 	else
   1218   2174  eschrock 		return (spa->spa_load_guid);
   1219    789    ahrens }
   1220    789    ahrens 
   1221    789    ahrens uint64_t
   1222    789    ahrens spa_last_synced_txg(spa_t *spa)
   1223    789    ahrens {
   1224    789    ahrens 	return (spa->spa_ubsync.ub_txg);
   1225    789    ahrens }
   1226    789    ahrens 
   1227    789    ahrens uint64_t
   1228    789    ahrens spa_first_txg(spa_t *spa)
   1229    789    ahrens {
   1230    789    ahrens 	return (spa->spa_first_txg);
   1231    789    ahrens }
   1232    789    ahrens 
   1233  10922      Jeff uint64_t
   1234  10922      Jeff spa_syncing_txg(spa_t *spa)
   1235  10922      Jeff {
   1236  10922      Jeff 	return (spa->spa_syncing_txg);
   1237  10922      Jeff }
   1238  10922      Jeff 
   1239   7837   Matthew pool_state_t
   1240    789    ahrens spa_state(spa_t *spa)
   1241    789    ahrens {
   1242    789    ahrens 	return (spa->spa_state);
   1243    789    ahrens }
   1244    789    ahrens 
   1245  11147    George spa_load_state_t
   1246  11147    George spa_load_state(spa_t *spa)
   1247  11147    George {
   1248  11147    George 	return (spa->spa_load_state);
   1249  11147    George }
   1250  11147    George 
   1251    789    ahrens uint64_t
   1252    789    ahrens spa_freeze_txg(spa_t *spa)
   1253    789    ahrens {
   1254    789    ahrens 	return (spa->spa_freeze_txg);
   1255    789    ahrens }
   1256    789    ahrens 
   1257    789    ahrens /* ARGSUSED */
   1258    789    ahrens uint64_t
   1259    789    ahrens spa_get_asize(spa_t *spa, uint64_t lsize)
   1260    789    ahrens {
   1261    789    ahrens 	/*
   1262  10922      Jeff 	 * The worst case is single-sector max-parity RAID-Z blocks, in which
   1263  10922      Jeff 	 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
   1264  10922      Jeff 	 * times the size; so just assume that.  Add to this the fact that
   1265  10922      Jeff 	 * we can have up to 3 DVAs per bp, and one more factor of 2 because
   1266  10922      Jeff 	 * the block may be dittoed with up to 3 DVAs by ddt_sync().
   1267    789    ahrens 	 */
   1268  10922      Jeff 	return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
   1269   1775     billm }
   1270   1775     billm 
   1271  10956    George uint64_t
   1272  10956    George spa_get_dspace(spa_t *spa)
   1273  10956    George {
   1274  10956    George 	return (spa->spa_dspace);
   1275  10956    George }
   1276  10956    George 
   1277  10956    George void
   1278  10956    George spa_update_dspace(spa_t *spa)
   1279  10956    George {
   1280  10956    George 	spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
   1281  10956    George 	    ddt_get_dedup_dspace(spa);
   1282  10956    George }
   1283  10956    George 
   1284   5329   gw25295 /*
   1285   5329   gw25295  * Return the failure mode that has been set to this pool. The default
   1286   5329   gw25295  * behavior will be to block all I/Os when a complete failure occurs.
   1287   5329   gw25295  */
   1288   5329   gw25295 uint8_t
   1289   5329   gw25295 spa_get_failmode(spa_t *spa)
   1290   5329   gw25295 {
   1291   5329   gw25295 	return (spa->spa_failmode);
   1292   5329   gw25295 }
   1293   5329   gw25295 
   1294   7754      Jeff boolean_t
   1295   7754      Jeff spa_suspended(spa_t *spa)
   1296   7754      Jeff {
   1297   7754      Jeff 	return (spa->spa_suspended);
   1298   7754      Jeff }
   1299   7754      Jeff 
   1300   1775     billm uint64_t
   1301   1775     billm spa_version(spa_t *spa)
   1302   1775     billm {
   1303   1775     billm 	return (spa->spa_ubsync.ub_version);
   1304   1775     billm }
   1305   1775     billm 
   1306  10922      Jeff boolean_t
   1307  10922      Jeff spa_deflate(spa_t *spa)
   1308  10922      Jeff {
   1309  10922      Jeff 	return (spa->spa_deflate);
   1310  10922      Jeff }
   1311  10922      Jeff 
   1312  10922      Jeff metaslab_class_t *
   1313  10922      Jeff spa_normal_class(spa_t *spa)
   1314  10922      Jeff {
   1315  10922      Jeff 	return (spa->spa_normal_class);
   1316  10922      Jeff }
   1317  10922      Jeff 
   1318  10922      Jeff metaslab_class_t *
   1319  10922      Jeff spa_log_class(spa_t *spa)
   1320  10922      Jeff {
   1321  10922      Jeff 	return (spa->spa_log_class);
   1322  10922      Jeff }
   1323  10922      Jeff 
   1324   1775     billm int
   1325   1775     billm spa_max_replication(spa_t *spa)
   1326   1775     billm {
   1327   1775     billm 	/*
   1328   4577    ahrens 	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
   1329   1775     billm 	 * handle BPs with more than one DVA allocated.  Set our max
   1330   1775     billm 	 * replication level accordingly.
   1331   1775     billm 	 */
   1332   4577    ahrens 	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
   1333   1775     billm 		return (1);
   1334   1775     billm 	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
   1335   2082  eschrock }
   1336   2082  eschrock 
   1337   2082  eschrock uint64_t
   1338  10922      Jeff dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
   1339   2082  eschrock {
   1340  10922      Jeff 	uint64_t asize = DVA_GET_ASIZE(dva);
   1341  10922      Jeff 	uint64_t dsize = asize;
   1342   2082  eschrock 
   1343  10922      Jeff 	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
   1344  10922      Jeff 
   1345  10922      Jeff 	if (asize != 0 && spa->spa_deflate) {
   1346  10922      Jeff 		vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
   1347  10922      Jeff 		dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
   1348  10922      Jeff 	}
   1349  10922      Jeff 
   1350  10922      Jeff 	return (dsize);
   1351  10922      Jeff }
   1352  10922      Jeff 
   1353  10922      Jeff uint64_t
   1354  10922      Jeff bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
   1355  10922      Jeff {
   1356  10922      Jeff 	uint64_t dsize = 0;
   1357  10922      Jeff 
   1358  10922      Jeff 	for (int d = 0; d < SPA_DVAS_PER_BP; d++)
   1359  10922      Jeff 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
   1360  10922      Jeff 
   1361  10922      Jeff 	return (dsize);
   1362  10922      Jeff }
   1363  10922      Jeff 
   1364  10922      Jeff uint64_t
   1365  10922      Jeff bp_get_dsize(spa_t *spa, const blkptr_t *bp)
   1366  10922      Jeff {
   1367  10922      Jeff 	uint64_t dsize = 0;
   1368   2082  eschrock 
   1369   7754      Jeff 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
   1370  10922      Jeff 
   1371  10922      Jeff 	for (int d = 0; d < SPA_DVAS_PER_BP; d++)
   1372  10922      Jeff 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
   1373  10922      Jeff 
   1374   7754      Jeff 	spa_config_exit(spa, SCL_VDEV, FTAG);
   1375  10922      Jeff 
   1376  10922      Jeff 	return (dsize);
   1377    789    ahrens }
   1378    789    ahrens 
   1379    789    ahrens /*
   1380    789    ahrens  * ==========================================================================
   1381    789    ahrens  * Initialization and Termination
   1382    789    ahrens  * ==========================================================================
   1383    789    ahrens  */
   1384    789    ahrens 
   1385    789    ahrens static int
   1386    789    ahrens spa_name_compare(const void *a1, const void *a2)
   1387    789    ahrens {
   1388    789    ahrens 	const spa_t *s1 = a1;
   1389    789    ahrens 	const spa_t *s2 = a2;
   1390    789    ahrens 	int s;
   1391    789    ahrens 
   1392    789    ahrens 	s = strcmp(s1->spa_name, s2->spa_name);
   1393    789    ahrens 	if (s > 0)
   1394    789    ahrens 		return (1);
   1395    789    ahrens 	if (s < 0)
   1396    789    ahrens 		return (-1);
   1397    789    ahrens 	return (0);
   1398    789    ahrens }
   1399    789    ahrens 
   1400   1635   bonwick int
   1401   1635   bonwick spa_busy(void)
   1402   1635   bonwick {
   1403   1635   bonwick 	return (spa_active_count);
   1404   1635   bonwick }
   1405   1635   bonwick 
   1406    789    ahrens void
   1407   6423   gw25295 spa_boot_init()
   1408   6423   gw25295 {
   1409   6423   gw25295 	spa_config_load();
   1410   6423   gw25295 }
   1411   6423   gw25295 
   1412   6423   gw25295 void
   1413    789    ahrens spa_init(int mode)
   1414    789    ahrens {
   1415    789    ahrens 	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
   1416   4831   gw25295 	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
   1417   5450   brendan 	mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
   1418    789    ahrens 	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
   1419    789    ahrens 
   1420    789    ahrens 	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
   1421    789    ahrens 	    offsetof(spa_t, spa_avl));
   1422    789    ahrens 
   1423   5450   brendan 	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
   1424   5450   brendan 	    offsetof(spa_aux_t, aux_avl));
   1425   5450   brendan 
   1426   5450   brendan 	avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
   1427   5450   brendan 	    offsetof(spa_aux_t, aux_avl));
   1428   2082  eschrock 
   1429   8241      Jeff 	spa_mode_global = mode;
   1430    789    ahrens 
   1431    789    ahrens 	refcount_init();
   1432    789    ahrens 	unique_init();
   1433    789    ahrens 	zio_init();
   1434    789    ahrens 	dmu_init();
   1435    789    ahrens 	zil_init();
   1436   5810  ek110237 	vdev_cache_stat_init();
   1437   4787    ahrens 	zfs_prop_init();
   1438   5094     lling 	zpool_prop_init();
   1439    789    ahrens 	spa_config_load();
   1440   7754      Jeff 	l2arc_start();
   1441    789    ahrens }
   1442    789    ahrens 
   1443    789    ahrens void
   1444    789    ahrens spa_fini(void)
   1445    789    ahrens {
   1446   7754      Jeff 	l2arc_stop();
   1447   7754      Jeff 
   1448    789    ahrens 	spa_evict_all();
   1449    789    ahrens 
   1450   5810  ek110237 	vdev_cache_stat_fini();
   1451    789    ahrens 	zil_fini();
   1452    789    ahrens 	dmu_fini();
   1453    789    ahrens 	zio_fini();
   1454   4787    ahrens 	unique_fini();
   1455    789    ahrens 	refcount_fini();
   1456    789    ahrens 
   1457    789    ahrens 	avl_destroy(&spa_namespace_avl);
   1458   2082  eschrock 	avl_destroy(&spa_spare_avl);
   1459   5450   brendan 	avl_destroy(&spa_l2cache_avl);
   1460    789    ahrens 
   1461    789    ahrens 	cv_destroy(&spa_namespace_cv);
   1462    789    ahrens 	mutex_destroy(&spa_namespace_lock);
   1463   4831   gw25295 	mutex_destroy(&spa_spare_lock);
   1464   5450   brendan 	mutex_destroy(&spa_l2cache_lock);
   1465    789    ahrens }
   1466   4620    perrin 
   1467   4620    perrin /*
   1468   4620    perrin  * Return whether this pool has slogs. No locking needed.
   1469   4620    perrin  * It's not a problem if the wrong answer is returned as it's only for
   1470   4620    perrin  * performance and not correctness
   1471   4620    perrin  */
   1472   4620    perrin boolean_t
   1473   4620    perrin spa_has_slogs(spa_t *spa)
   1474   4620    perrin {
   1475   4620    perrin 	return (spa->spa_log_class->mc_rotor != NULL);
   1476   4620    perrin }
   1477   6673  eschrock 
   1478  10922      Jeff spa_log_state_t
   1479  10922      Jeff spa_get_log_state(spa_t *spa)
   1480  10922      Jeff {
   1481  10922      Jeff 	return (spa->spa_log_state);
   1482  10922      Jeff }
   1483  10922      Jeff 
   1484  10922      Jeff void
   1485  10922      Jeff spa_set_log_state(spa_t *spa, spa_log_state_t state)
   1486  10922      Jeff {
   1487  10922      Jeff 	spa->spa_log_state = state;
   1488  10922      Jeff }
   1489  10922      Jeff 
   1490   6673  eschrock boolean_t
   1491   6673  eschrock spa_is_root(spa_t *spa)
   1492   6673  eschrock {
   1493   6673  eschrock 	return (spa->spa_is_root);
   1494   6673  eschrock }
   1495   8241      Jeff 
   1496   8241      Jeff boolean_t
   1497   8241      Jeff spa_writeable(spa_t *spa)
   1498   8241      Jeff {
   1499   8241      Jeff 	return (!!(spa->spa_mode & FWRITE));
   1500   8241      Jeff }
   1501   8241      Jeff 
   1502   8241      Jeff int
   1503   8241      Jeff spa_mode(spa_t *spa)
   1504   8241      Jeff {
   1505   8241      Jeff 	return (spa->spa_mode);
   1506   8241      Jeff }
   1507  10922      Jeff 
   1508  10922      Jeff uint64_t
   1509  10922      Jeff spa_bootfs(spa_t *spa)
   1510  10922      Jeff {
   1511  10922      Jeff 	return (spa->spa_bootfs);
   1512  10922      Jeff }
   1513  10922      Jeff 
   1514  10922      Jeff uint64_t
   1515  10922      Jeff spa_delegation(spa_t *spa)
   1516  10922      Jeff {
   1517  10922      Jeff 	return (spa->spa_delegation);
   1518  10922      Jeff }
   1519  10922      Jeff 
   1520  10922      Jeff objset_t *
   1521  10922      Jeff spa_meta_objset(spa_t *spa)
   1522  10922      Jeff {
   1523  10922      Jeff 	return (spa->spa_meta_objset);
   1524  10922      Jeff }
   1525  10922      Jeff 
   1526  10922      Jeff enum zio_checksum
   1527  10922      Jeff spa_dedup_checksum(spa_t *spa)
   1528  10922      Jeff {
   1529  10922      Jeff 	return (spa->spa_dedup_checksum);
   1530  10922      Jeff }
   1531