Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21   2082  eschrock 
     22    789    ahrens /*
     23   8525      Eric  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24    789    ahrens  * Use is subject to license terms.
     25    789    ahrens  */
     26    789    ahrens 
     27    789    ahrens /*
     28    789    ahrens  * This file contains all the routines used when modifying on-disk SPA state.
     29    789    ahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
     30    789    ahrens  * pool.
     31    789    ahrens  */
     32    789    ahrens 
     33    789    ahrens #include <sys/zfs_context.h>
     34   1544  eschrock #include <sys/fm/fs/zfs.h>
     35    789    ahrens #include <sys/spa_impl.h>
     36    789    ahrens #include <sys/zio.h>
     37    789    ahrens #include <sys/zio_checksum.h>
     38    789    ahrens #include <sys/dmu.h>
     39    789    ahrens #include <sys/dmu_tx.h>
     40    789    ahrens #include <sys/zap.h>
     41    789    ahrens #include <sys/zil.h>
     42  10922      Jeff #include <sys/ddt.h>
     43    789    ahrens #include <sys/vdev_impl.h>
     44    789    ahrens #include <sys/metaslab.h>
     45  10594    George #include <sys/metaslab_impl.h>
     46    789    ahrens #include <sys/uberblock_impl.h>
     47    789    ahrens #include <sys/txg.h>
     48    789    ahrens #include <sys/avl.h>
     49    789    ahrens #include <sys/dmu_traverse.h>
     50   3912     lling #include <sys/dmu_objset.h>
     51    789    ahrens #include <sys/unique.h>
     52    789    ahrens #include <sys/dsl_pool.h>
     53   3912     lling #include <sys/dsl_dataset.h>
     54    789    ahrens #include <sys/dsl_dir.h>
     55    789    ahrens #include <sys/dsl_prop.h>
     56   3912     lling #include <sys/dsl_synctask.h>
     57    789    ahrens #include <sys/fs/zfs.h>
     58   5450   brendan #include <sys/arc.h>
     59    789    ahrens #include <sys/callb.h>
     60   3975  ek110237 #include <sys/systeminfo.h>
     61   6423   gw25295 #include <sys/spa_boot.h>
     62   9816    George #include <sys/zfs_ioctl.h>
     63   2986  ek110237 
     64   8662    Jordan #ifdef	_KERNEL
     65  11173  Jonathan #include <sys/bootprops.h>
     66  11173  Jonathan #include <sys/callb.h>
     67  11173  Jonathan #include <sys/cpupart.h>
     68  11173  Jonathan #include <sys/pool.h>
     69  11173  Jonathan #include <sys/sysdc.h>
     70   8662    Jordan #include <sys/zone.h>
     71   8662    Jordan #endif	/* _KERNEL */
     72   8662    Jordan 
     73   5094     lling #include "zfs_prop.h"
     74   5913    perrin #include "zfs_comutil.h"
     75   5094     lling 
     76  11173  Jonathan typedef enum zti_modes {
     77   9515  Jonathan 	zti_mode_fixed,			/* value is # of threads (min 1) */
     78   9515  Jonathan 	zti_mode_online_percent,	/* value is % of online CPUs */
     79  11173  Jonathan 	zti_mode_batch,			/* cpu-intensive; value is ignored */
     80  11146    George 	zti_mode_null,			/* don't create a taskq */
     81   9515  Jonathan 	zti_nmodes
     82  11173  Jonathan } zti_modes_t;
     83   9515  Jonathan 
     84  11146    George #define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
     85  11146    George #define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
     86  11173  Jonathan #define	ZTI_BATCH	{ zti_mode_batch, 0 }
     87  11146    George #define	ZTI_NULL	{ zti_mode_null, 0 }
     88  11146    George 
     89  11146    George #define	ZTI_ONE		ZTI_FIX(1)
     90   9515  Jonathan 
     91   9515  Jonathan typedef struct zio_taskq_info {
     92  11146    George 	enum zti_modes zti_mode;
     93  11146    George 	uint_t zti_value;
     94   9515  Jonathan } zio_taskq_info_t;
     95   9515  Jonathan 
     96   9515  Jonathan static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
     97  11173  Jonathan 	"issue", "issue_high", "intr", "intr_high"
     98   9515  Jonathan };
     99   9515  Jonathan 
    100  11146    George /*
    101  11146    George  * Define the taskq threads for the following I/O types:
    102  11146    George  * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
    103  11146    George  */
    104  11146    George const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
    105  11146    George 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
    106  11146    George 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
    107  11173  Jonathan 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
    108  11173  Jonathan 	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
    109  11146    George 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
    110  11146    George 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
    111  11146    George 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
    112   9515  Jonathan };
    113   9515  Jonathan 
    114   5094     lling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
    115   7214     lling static boolean_t spa_has_active_shared_spare(spa_t *spa);
    116  11173  Jonathan 
    117  11173  Jonathan uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
    118  11173  Jonathan id_t		zio_taskq_psrset_bind = PS_NONE;
    119  11173  Jonathan boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
    120  11173  Jonathan uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
    121  11173  Jonathan 
    122  11173  Jonathan boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
    123  11173  Jonathan 
    124  11173  Jonathan /*
    125  11173  Jonathan  * This (illegal) pool name is used when temporarily importing a spa_t in order
    126  11173  Jonathan  * to get the vdev stats associated with the imported devices.
    127  11173  Jonathan  */
    128  11173  Jonathan #define	TRYIMPORT_NAME	"$import"
    129   5094     lling 
    130   5094     lling /*
    131   5094     lling  * ==========================================================================
    132   5094     lling  * SPA properties routines
    133   5094     lling  * ==========================================================================
    134   5094     lling  */
    135   5094     lling 
    136   5094     lling /*
    137   5094     lling  * Add a (source=src, propname=propval) list to an nvlist.
    138   5094     lling  */
    139   5949     lling static void
    140   5094     lling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
    141   5094     lling     uint64_t intval, zprop_source_t src)
    142   5094     lling {
    143   5094     lling 	const char *propname = zpool_prop_to_name(prop);
    144   5094     lling 	nvlist_t *propval;
    145   5949     lling 
    146   5949     lling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
    147   5949     lling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
    148   5949     lling 
    149   5949     lling 	if (strval != NULL)
    150   5949     lling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
    151   5949     lling 	else
    152   5949     lling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
    153   5949     lling 
    154   5949     lling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
    155   5094     lling 	nvlist_free(propval);
    156   5094     lling }
    157   5094     lling 
    158   5094     lling /*
    159   5094     lling  * Get property values from the spa configuration.
    160   5094     lling  */
    161   5949     lling static void
    162   5094     lling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
    163   5094     lling {
    164   8525      Eric 	uint64_t size;
    165  10956    George 	uint64_t alloc;
    166   5094     lling 	uint64_t cap, version;
    167   5094     lling 	zprop_source_t src = ZPROP_SRC_NONE;
    168   6643  eschrock 	spa_config_dirent_t *dp;
    169   5094     lling 
    170   7754      Jeff 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
    171   7754      Jeff 
    172   8525      Eric 	if (spa->spa_root_vdev != NULL) {
    173  10956    George 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
    174  10922      Jeff 		size = metaslab_class_get_space(spa_normal_class(spa));
    175   8525      Eric 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
    176   8525      Eric 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
    177  10956    George 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
    178  10956    George 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
    179  10956    George 		    size - alloc, src);
    180  10956    George 
    181  10956    George 		cap = (size == 0) ? 0 : (alloc * 100 / size);
    182   8525      Eric 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
    183  10922      Jeff 
    184  10922      Jeff 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
    185  10922      Jeff 		    ddt_get_pool_dedup_ratio(spa), src);
    186   8525      Eric 
    187   8525      Eric 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
    188   8525      Eric 		    spa->spa_root_vdev->vdev_state, src);
    189   8525      Eric 
    190   8525      Eric 		version = spa_version(spa);
    191   8525      Eric 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
    192   8525      Eric 			src = ZPROP_SRC_DEFAULT;
    193   8525      Eric 		else
    194   8525      Eric 			src = ZPROP_SRC_LOCAL;
    195   8525      Eric 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
    196   8525      Eric 	}
    197   5949     lling 
    198   5949     lling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
    199   5949     lling 
    200   5949     lling 	if (spa->spa_root != NULL)
    201   5949     lling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
    202   5949     lling 		    0, ZPROP_SRC_LOCAL);
    203   5094     lling 
    204   6643  eschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
    205   6643  eschrock 		if (dp->scd_path == NULL) {
    206   5949     lling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
    207   6643  eschrock 			    "none", 0, ZPROP_SRC_LOCAL);
    208   6643  eschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
    209   5949     lling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
    210   6643  eschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
    211   5363  eschrock 		}
    212   5949     lling 	}
    213   5094     lling }
    214   5094     lling 
    215   5094     lling /*
    216   5094     lling  * Get zpool property values.
    217   5094     lling  */
    218   5094     lling int
    219   5094     lling spa_prop_get(spa_t *spa, nvlist_t **nvp)
    220   5094     lling {
    221  10922      Jeff 	objset_t *mos = spa->spa_meta_objset;
    222   5094     lling 	zap_cursor_t zc;
    223   5094     lling 	zap_attribute_t za;
    224   5094     lling 	int err;
    225   5094     lling 
    226   5949     lling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
    227   5094     lling 
    228   7754      Jeff 	mutex_enter(&spa->spa_props_lock);
    229   7754      Jeff 
    230   5094     lling 	/*
    231   5094     lling 	 * Get properties from the spa config.
    232   5094     lling 	 */
    233   5949     lling 	spa_prop_get_config(spa, nvp);
    234   5094     lling 
    235   5094     lling 	/* If no pool property object, no more prop to get. */
    236   5094     lling 	if (spa->spa_pool_props_object == 0) {
    237   5094     lling 		mutex_exit(&spa->spa_props_lock);
    238   5094     lling 		return (0);
    239   5094     lling 	}
    240   5094     lling 
    241   5094     lling 	/*
    242   5094     lling 	 * Get properties from the MOS pool property object.
    243   5094     lling 	 */
    244   5094     lling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
    245   5094     lling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
    246   5094     lling 	    zap_cursor_advance(&zc)) {
    247   5094     lling 		uint64_t intval = 0;
    248   5094     lling 		char *strval = NULL;
    249   5094     lling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
    250   5094     lling 		zpool_prop_t prop;
    251   5094     lling 
    252   5094     lling 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
    253   5094     lling 			continue;
    254   5094     lling 
    255   5094     lling 		switch (za.za_integer_length) {
    256   5094     lling 		case 8:
    257   5094     lling 			/* integer property */
    258   5094     lling 			if (za.za_first_integer !=
    259   5094     lling 			    zpool_prop_default_numeric(prop))
    260   5094     lling 				src = ZPROP_SRC_LOCAL;
    261   5094     lling 
    262   5094     lling 			if (prop == ZPOOL_PROP_BOOTFS) {
    263   5094     lling 				dsl_pool_t *dp;
    264   5094     lling 				dsl_dataset_t *ds = NULL;
    265   5094     lling 
    266   5094     lling 				dp = spa_get_dsl(spa);
    267   5094     lling 				rw_enter(&dp->dp_config_rwlock, RW_READER);
    268   6689    maybee 				if (err = dsl_dataset_hold_obj(dp,
    269   6689    maybee 				    za.za_first_integer, FTAG, &ds)) {
    270   5094     lling 					rw_exit(&dp->dp_config_rwlock);
    271   5094     lling 					break;
    272   5094     lling 				}
    273   5094     lling 
    274   5094     lling 				strval = kmem_alloc(
    275   5094     lling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
    276   5094     lling 				    KM_SLEEP);
    277   5094     lling 				dsl_dataset_name(ds, strval);
    278   6689    maybee 				dsl_dataset_rele(ds, FTAG);
    279   5094     lling 				rw_exit(&dp->dp_config_rwlock);
    280   5094     lling 			} else {
    281   5094     lling 				strval = NULL;
    282   5094     lling 				intval = za.za_first_integer;
    283   5094     lling 			}
    284   5094     lling 
    285   5949     lling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
    286   5094     lling 
    287   5094     lling 			if (strval != NULL)
    288   5094     lling 				kmem_free(strval,
    289   5094     lling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
    290   5094     lling 
    291   5094     lling 			break;
    292   5094     lling 
    293   5094     lling 		case 1:
    294   5094     lling 			/* string property */
    295   5094     lling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
    296   5094     lling 			err = zap_lookup(mos, spa->spa_pool_props_object,
    297   5094     lling 			    za.za_name, 1, za.za_num_integers, strval);
    298   5094     lling 			if (err) {
    299   5094     lling 				kmem_free(strval, za.za_num_integers);
    300   5094     lling 				break;
    301   5094     lling 			}
    302   5949     lling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
    303   5094     lling 			kmem_free(strval, za.za_num_integers);
    304   5094     lling 			break;
    305   5094     lling 
    306   5094     lling 		default:
    307   5094     lling 			break;
    308   5094     lling 		}
    309   5094     lling 	}
    310   5094     lling 	zap_cursor_fini(&zc);
    311   5094     lling 	mutex_exit(&spa->spa_props_lock);
    312   5094     lling out:
    313   5094     lling 	if (err && err != ENOENT) {
    314   5094     lling 		nvlist_free(*nvp);
    315   5949     lling 		*nvp = NULL;
    316   5094     lling 		return (err);
    317   5094     lling 	}
    318   5094     lling 
    319   5094     lling 	return (0);
    320   5094     lling }
    321   5094     lling 
    322   5094     lling /*
    323   5094     lling  * Validate the given pool properties nvlist and modify the list
    324   5094     lling  * for the property values to be set.
    325   5094     lling  */
    326   5094     lling static int
    327   5094     lling spa_prop_validate(spa_t *spa, nvlist_t *props)
    328   5094     lling {
    329   5094     lling 	nvpair_t *elem;
    330   5094     lling 	int error = 0, reset_bootfs = 0;
    331   5094     lling 	uint64_t objnum;
    332   5094     lling 
    333   5094     lling 	elem = NULL;
    334   5094     lling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
    335   5094     lling 		zpool_prop_t prop;
    336   5094     lling 		char *propname, *strval;
    337   5094     lling 		uint64_t intval;
    338   5094     lling 		objset_t *os;
    339   5363  eschrock 		char *slash;
    340   5094     lling 
    341   5094     lling 		propname = nvpair_name(elem);
    342   5094     lling 
    343   5094     lling 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
    344   5094     lling 			return (EINVAL);
    345   5094     lling 
    346   5094     lling 		switch (prop) {
    347   5094     lling 		case ZPOOL_PROP_VERSION:
    348   5094     lling 			error = nvpair_value_uint64(elem, &intval);
    349   5094     lling 			if (!error &&
    350   5094     lling 			    (intval < spa_version(spa) || intval > SPA_VERSION))
    351   5094     lling 				error = EINVAL;
    352   5094     lling 			break;
    353   5094     lling 
    354   5094     lling 		case ZPOOL_PROP_DELEGATION:
    355   5094     lling 		case ZPOOL_PROP_AUTOREPLACE:
    356   7538   Richard 		case ZPOOL_PROP_LISTSNAPS:
    357   9816    George 		case ZPOOL_PROP_AUTOEXPAND:
    358   5094     lling 			error = nvpair_value_uint64(elem, &intval);
    359   5094     lling 			if (!error && intval > 1)
    360   5094     lling 				error = EINVAL;
    361   5094     lling 			break;
    362   5094     lling 
    363   5094     lling 		case ZPOOL_PROP_BOOTFS:
    364   9630      Jeff 			/*
    365   9630      Jeff 			 * If the pool version is less than SPA_VERSION_BOOTFS,
    366   9630      Jeff 			 * or the pool is still being created (version == 0),
    367   9630      Jeff 			 * the bootfs property cannot be set.
    368   9630      Jeff 			 */
    369   5094     lling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
    370   5094     lling 				error = ENOTSUP;
    371   5094     lling 				break;
    372   5094     lling 			}
    373   5094     lling 
    374   5094     lling 			/*
    375   7042   gw25295 			 * Make sure the vdev config is bootable
    376   7042   gw25295 			 */
    377   7042   gw25295 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
    378   5094     lling 				error = ENOTSUP;
    379   5094     lling 				break;
    380   5094     lling 			}
    381   5094     lling 
    382   5094     lling 			reset_bootfs = 1;
    383   5094     lling 
    384   5094     lling 			error = nvpair_value_string(elem, &strval);
    385   5094     lling 
    386   5094     lling 			if (!error) {
    387   7042   gw25295 				uint64_t compress;
    388   7042   gw25295 
    389   5094     lling 				if (strval == NULL || strval[0] == '\0') {
    390   5094     lling 					objnum = zpool_prop_default_numeric(
    391   5094     lling 					    ZPOOL_PROP_BOOTFS);
    392   5094     lling 					break;
    393   5094     lling 				}
    394   5094     lling 
    395  10298   Matthew 				if (error = dmu_objset_hold(strval, FTAG, &os))
    396  10298   Matthew 					break;
    397  10298   Matthew 
    398  10298   Matthew 				/* Must be ZPL and not gzip compressed. */
    399  10298   Matthew 
    400  10298   Matthew 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
    401  10298   Matthew 					error = ENOTSUP;
    402  10298   Matthew 				} else if ((error = dsl_prop_get_integer(strval,
    403   7042   gw25295 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
    404   7042   gw25295 				    &compress, NULL)) == 0 &&
    405   7042   gw25295 				    !BOOTFS_COMPRESS_VALID(compress)) {
    406   7042   gw25295 					error = ENOTSUP;
    407   7042   gw25295 				} else {
    408   7042   gw25295 					objnum = dmu_objset_id(os);
    409   7042   gw25295 				}
    410  10298   Matthew 				dmu_objset_rele(os, FTAG);
    411   5094     lling 			}
    412   5094     lling 			break;
    413   7754      Jeff 
    414   5329   gw25295 		case ZPOOL_PROP_FAILUREMODE:
    415   5329   gw25295 			error = nvpair_value_uint64(elem, &intval);
    416   5329   gw25295 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
    417   5329   gw25295 			    intval > ZIO_FAILURE_MODE_PANIC))
    418   5329   gw25295 				error = EINVAL;
    419   5329   gw25295 
    420   5329   gw25295 			/*
    421   5329   gw25295 			 * This is a special case which only occurs when
    422   5329   gw25295 			 * the pool has completely failed. This allows
    423   5329   gw25295 			 * the user to change the in-core failmode property
    424   5329   gw25295 			 * without syncing it out to disk (I/Os might
    425   5329   gw25295 			 * currently be blocked). We do this by returning
    426   5329   gw25295 			 * EIO to the caller (spa_prop_set) to trick it
    427   5329   gw25295 			 * into thinking we encountered a property validation
    428   5329   gw25295 			 * error.
    429   5329   gw25295 			 */
    430   7754      Jeff 			if (!error && spa_suspended(spa)) {
    431   5329   gw25295 				spa->spa_failmode = intval;
    432   5329   gw25295 				error = EIO;
    433   5329   gw25295 			}
    434   5363  eschrock 			break;
    435   5363  eschrock 
    436   5363  eschrock 		case ZPOOL_PROP_CACHEFILE:
    437   5363  eschrock 			if ((error = nvpair_value_string(elem, &strval)) != 0)
    438   5363  eschrock 				break;
    439   5363  eschrock 
    440   5363  eschrock 			if (strval[0] == '\0')
    441   5363  eschrock 				break;
    442   5363  eschrock 
    443   5363  eschrock 			if (strcmp(strval, "none") == 0)
    444   5363  eschrock 				break;
    445   5363  eschrock 
    446   5363  eschrock 			if (strval[0] != '/') {
    447   5363  eschrock 				error = EINVAL;
    448   5363  eschrock 				break;
    449   5363  eschrock 			}
    450   5363  eschrock 
    451   5363  eschrock 			slash = strrchr(strval, '/');
    452   5363  eschrock 			ASSERT(slash != NULL);
    453   5363  eschrock 
    454   5363  eschrock 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
    455   5363  eschrock 			    strcmp(slash, "/..") == 0)
    456   5363  eschrock 				error = EINVAL;
    457   5329   gw25295 			break;
    458  10922      Jeff 
    459  10922      Jeff 		case ZPOOL_PROP_DEDUPDITTO:
    460  10922      Jeff 			if (spa_version(spa) < SPA_VERSION_DEDUP)
    461  10922      Jeff 				error = ENOTSUP;
    462  10922      Jeff 			else
    463  10922      Jeff 				error = nvpair_value_uint64(elem, &intval);
    464  10922      Jeff 			if (error == 0 &&
    465  10922      Jeff 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
    466  10922      Jeff 				error = EINVAL;
    467  10922      Jeff 			break;
    468   5094     lling 		}
    469   5094     lling 
    470   5094     lling 		if (error)
    471   5094     lling 			break;
    472   5094     lling 	}
    473   5094     lling 
    474   5094     lling 	if (!error && reset_bootfs) {
    475   5094     lling 		error = nvlist_remove(props,
    476   5094     lling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
    477   5094     lling 
    478   5094     lling 		if (!error) {
    479   5094     lling 			error = nvlist_add_uint64(props,
    480   5094     lling 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
    481   5094     lling 		}
    482   5094     lling 	}
    483   5094     lling 
    484   5094     lling 	return (error);
    485   5094     lling }
    486   5094     lling 
    487   8525      Eric void
    488   8525      Eric spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
    489   8525      Eric {
    490   8525      Eric 	char *cachefile;
    491   8525      Eric 	spa_config_dirent_t *dp;
    492   8525      Eric 
    493   8525      Eric 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
    494   8525      Eric 	    &cachefile) != 0)
    495   8525      Eric 		return;
    496   8525      Eric 
    497   8525      Eric 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
    498   8525      Eric 	    KM_SLEEP);
    499   8525      Eric 
    500   8525      Eric 	if (cachefile[0] == '\0')
    501   8525      Eric 		dp->scd_path = spa_strdup(spa_config_path);
    502   8525      Eric 	else if (strcmp(cachefile, "none") == 0)
    503   8525      Eric 		dp->scd_path = NULL;
    504   8525      Eric 	else
    505   8525      Eric 		dp->scd_path = spa_strdup(cachefile);
    506   8525      Eric 
    507   8525      Eric 	list_insert_head(&spa->spa_config_list, dp);
    508   8525      Eric 	if (need_sync)
    509   8525      Eric 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
    510   8525      Eric }
    511   8525      Eric 
    512   5094     lling int
    513   5094     lling spa_prop_set(spa_t *spa, nvlist_t *nvp)
    514   5094     lling {
    515   5094     lling 	int error;
    516   8525      Eric 	nvpair_t *elem;
    517   8525      Eric 	boolean_t need_sync = B_FALSE;
    518   8525      Eric 	zpool_prop_t prop;
    519   5094     lling 
    520   5094     lling 	if ((error = spa_prop_validate(spa, nvp)) != 0)
    521   5094     lling 		return (error);
    522   5094     lling 
    523   8525      Eric 	elem = NULL;
    524   8525      Eric 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
    525   8525      Eric 		if ((prop = zpool_name_to_prop(
    526   8525      Eric 		    nvpair_name(elem))) == ZPROP_INVAL)
    527   8525      Eric 			return (EINVAL);
    528   8525      Eric 
    529   8525      Eric 		if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
    530   8525      Eric 			continue;
    531   8525      Eric 
    532   8525      Eric 		need_sync = B_TRUE;
    533   8525      Eric 		break;
    534   8525      Eric 	}
    535   8525      Eric 
    536   8525      Eric 	if (need_sync)
    537   8525      Eric 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
    538   8525      Eric 		    spa, nvp, 3));
    539   8525      Eric 	else
    540   8525      Eric 		return (0);
    541   5094     lling }
    542   5094     lling 
    543   5094     lling /*
    544   5094     lling  * If the bootfs property value is dsobj, clear it.
    545   5094     lling  */
    546   5094     lling void
    547   5094     lling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
    548   5094     lling {
    549   5094     lling 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
    550   5094     lling 		VERIFY(zap_remove(spa->spa_meta_objset,
    551   5094     lling 		    spa->spa_pool_props_object,
    552   5094     lling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
    553   5094     lling 		spa->spa_bootfs = 0;
    554   5094     lling 	}
    555   5094     lling }
    556    789    ahrens 
    557    789    ahrens /*
    558    789    ahrens  * ==========================================================================
    559    789    ahrens  * SPA state manipulation (open/create/destroy/import/export)
    560    789    ahrens  * ==========================================================================
    561    789    ahrens  */
    562    789    ahrens 
    563   1544  eschrock static int
    564   1544  eschrock spa_error_entry_compare(const void *a, const void *b)
    565   1544  eschrock {
    566   1544  eschrock 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
    567   1544  eschrock 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
    568   1544  eschrock 	int ret;
    569   1544  eschrock 
    570   1544  eschrock 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
    571   1544  eschrock 	    sizeof (zbookmark_t));
    572   1544  eschrock 
    573   1544  eschrock 	if (ret < 0)
    574   1544  eschrock 		return (-1);
    575   1544  eschrock 	else if (ret > 0)
    576   1544  eschrock 		return (1);
    577   1544  eschrock 	else
    578   1544  eschrock 		return (0);
    579   1544  eschrock }
    580   1544  eschrock 
    581   1544  eschrock /*
    582   1544  eschrock  * Utility function which retrieves copies of the current logs and
    583   1544  eschrock  * re-initializes them in the process.
    584   1544  eschrock  */
    585   1544  eschrock void
    586   1544  eschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
    587   1544  eschrock {
    588   1544  eschrock 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
    589   1544  eschrock 
    590   1544  eschrock 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
    591   1544  eschrock 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
    592   1544  eschrock 
    593   1544  eschrock 	avl_create(&spa->spa_errlist_scrub,
    594   1544  eschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    595   1544  eschrock 	    offsetof(spa_error_entry_t, se_avl));
    596   1544  eschrock 	avl_create(&spa->spa_errlist_last,
    597   1544  eschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    598   1544  eschrock 	    offsetof(spa_error_entry_t, se_avl));
    599   1544  eschrock }
    600   1544  eschrock 
    601  11173  Jonathan static taskq_t *
    602  11173  Jonathan spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
    603  11173  Jonathan     uint_t value)
    604  11173  Jonathan {
    605  11173  Jonathan 	uint_t flags = TASKQ_PREPOPULATE;
    606  11173  Jonathan 	boolean_t batch = B_FALSE;
    607  11173  Jonathan 
    608  11173  Jonathan 	switch (mode) {
    609  11173  Jonathan 	case zti_mode_null:
    610  11173  Jonathan 		return (NULL);		/* no taskq needed */
    611  11173  Jonathan 
    612  11173  Jonathan 	case zti_mode_fixed:
    613  11173  Jonathan 		ASSERT3U(value, >=, 1);
    614  11173  Jonathan 		value = MAX(value, 1);
    615  11173  Jonathan 		break;
    616  11173  Jonathan 
    617  11173  Jonathan 	case zti_mode_batch:
    618  11173  Jonathan 		batch = B_TRUE;
    619  11173  Jonathan 		flags |= TASKQ_THREADS_CPU_PCT;
    620  11173  Jonathan 		value = zio_taskq_batch_pct;
    621  11173  Jonathan 		break;
    622  11173  Jonathan 
    623  11173  Jonathan 	case zti_mode_online_percent:
    624  11173  Jonathan 		flags |= TASKQ_THREADS_CPU_PCT;
    625  11173  Jonathan 		break;
    626  11173  Jonathan 
    627  11173  Jonathan 	default:
    628  11173  Jonathan 		panic("unrecognized mode for %s taskq (%u:%u) in "
    629  11173  Jonathan 		    "spa_activate()",
    630  11173  Jonathan 		    name, mode, value);
    631  11173  Jonathan 		break;
    632  11173  Jonathan 	}
    633  11173  Jonathan 
    634  11173  Jonathan 	if (zio_taskq_sysdc && spa->spa_proc != &p0) {
    635  11173  Jonathan 		if (batch)
    636  11173  Jonathan 			flags |= TASKQ_DC_BATCH;
    637  11173  Jonathan 
    638  11173  Jonathan 		return (taskq_create_sysdc(name, value, 50, INT_MAX,
    639  11173  Jonathan 		    spa->spa_proc, zio_taskq_basedc, flags));
    640  11173  Jonathan 	}
    641  11173  Jonathan 	return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
    642  11173  Jonathan 	    spa->spa_proc, flags));
    643  11173  Jonathan }
    644  11173  Jonathan 
    645  11173  Jonathan static void
    646  11173  Jonathan spa_create_zio_taskqs(spa_t *spa)
    647  11173  Jonathan {
    648   7754      Jeff 	for (int t = 0; t < ZIO_TYPES; t++) {
    649   7754      Jeff 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
    650  11146    George 			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
    651  11146    George 			enum zti_modes mode = ztip->zti_mode;
    652  11146    George 			uint_t value = ztip->zti_value;
    653   9515  Jonathan 			char name[32];
    654   9515  Jonathan 
    655   9515  Jonathan 			(void) snprintf(name, sizeof (name),
    656  11146    George 			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
    657   9515  Jonathan 
    658  11173  Jonathan 			spa->spa_zio_taskq[t][q] =
    659  11173  Jonathan 			    spa_taskq_create(spa, name, mode, value);
    660  11173  Jonathan 		}
    661  11173  Jonathan 	}
    662  11173  Jonathan }
    663  11173  Jonathan 
    664  11173  Jonathan #ifdef _KERNEL
    665  11173  Jonathan static void
    666  11173  Jonathan spa_thread(void *arg)
    667  11173  Jonathan {
    668  11173  Jonathan 	callb_cpr_t cprinfo;
    669  11173  Jonathan 
    670  11173  Jonathan 	spa_t *spa = arg;
    671  11173  Jonathan 	user_t *pu = PTOU(curproc);
    672  11173  Jonathan 
    673  11173  Jonathan 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
    674  11173  Jonathan 	    spa->spa_name);
    675  11173  Jonathan 
    676  11173  Jonathan 	ASSERT(curproc != &p0);
    677  11173  Jonathan 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
    678  11173  Jonathan 	    "zpool-%s", spa->spa_name);
    679  11173  Jonathan 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
    680  11173  Jonathan 
    681  11173  Jonathan 	/* bind this thread to the requested psrset */
    682  11173  Jonathan 	if (zio_taskq_psrset_bind != PS_NONE) {
    683  11173  Jonathan 		pool_lock();
    684  11173  Jonathan 		mutex_enter(&cpu_lock);
    685  11173  Jonathan 		mutex_enter(&pidlock);
    686  11173  Jonathan 		mutex_enter(&curproc->p_lock);
    687  11173  Jonathan 
    688  11173  Jonathan 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
    689  11173  Jonathan 		    0, NULL, NULL) == 0)  {
    690  11173  Jonathan 			curthread->t_bind_pset = zio_taskq_psrset_bind;
    691  11173  Jonathan 		} else {
    692  11173  Jonathan 			cmn_err(CE_WARN,
    693  11173  Jonathan 			    "Couldn't bind process for zfs pool \"%s\" to "
    694  11173  Jonathan 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
    695  11173  Jonathan 		}
    696  11173  Jonathan 
    697  11173  Jonathan 		mutex_exit(&curproc->p_lock);
    698  11173  Jonathan 		mutex_exit(&pidlock);
    699  11173  Jonathan 		mutex_exit(&cpu_lock);
    700  11173  Jonathan 		pool_unlock();
    701  11173  Jonathan 	}
    702  11173  Jonathan 
    703  11173  Jonathan 	if (zio_taskq_sysdc) {
    704  11173  Jonathan 		sysdc_thread_enter(curthread, 100, 0);
    705  11173  Jonathan 	}
    706  11173  Jonathan 
    707  11173  Jonathan 	spa->spa_proc = curproc;
    708  11173  Jonathan 	spa->spa_did = curthread->t_did;
    709  11173  Jonathan 
    710  11173  Jonathan 	spa_create_zio_taskqs(spa);
    711  11173  Jonathan 
    712  11173  Jonathan 	mutex_enter(&spa->spa_proc_lock);
    713  11173  Jonathan 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
    714  11173  Jonathan 
    715  11173  Jonathan 	spa->spa_proc_state = SPA_PROC_ACTIVE;
    716  11173  Jonathan 	cv_broadcast(&spa->spa_proc_cv);
    717  11173  Jonathan 
    718  11173  Jonathan 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
    719  11173  Jonathan 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
    720  11173  Jonathan 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
    721  11173  Jonathan 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
    722  11173  Jonathan 
    723  11173  Jonathan 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
    724  11173  Jonathan 	spa->spa_proc_state = SPA_PROC_GONE;
    725  11173  Jonathan 	spa->spa_proc = &p0;
    726  11173  Jonathan 	cv_broadcast(&spa->spa_proc_cv);
    727  11173  Jonathan 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
    728  11173  Jonathan 
    729  11173  Jonathan 	mutex_enter(&curproc->p_lock);
    730  11173  Jonathan 	lwp_exit();
    731  11173  Jonathan }
    732  11173  Jonathan #endif
    733  11173  Jonathan 
    734  11173  Jonathan /*
    735  11173  Jonathan  * Activate an uninitialized pool.
    736  11173  Jonathan  */
    737  11173  Jonathan static void
    738  11173  Jonathan spa_activate(spa_t *spa, int mode)
    739  11173  Jonathan {
    740  11173  Jonathan 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
    741  11173  Jonathan 
    742  11173  Jonathan 	spa->spa_state = POOL_STATE_ACTIVE;
    743  11173  Jonathan 	spa->spa_mode = mode;
    744  11173  Jonathan 
    745  11173  Jonathan 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
    746  11173  Jonathan 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
    747  11173  Jonathan 
    748  11173  Jonathan 	/* Try to create a covering process */
    749  11173  Jonathan 	mutex_enter(&spa->spa_proc_lock);
    750  11173  Jonathan 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
    751  11173  Jonathan 	ASSERT(spa->spa_proc == &p0);
    752  11173  Jonathan 	spa->spa_did = 0;
    753  11173  Jonathan 
    754  11173  Jonathan 	/* Only create a process if we're going to be around a while. */
    755  11173  Jonathan 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
    756  11173  Jonathan 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
    757  11173  Jonathan 		    NULL, 0) == 0) {
    758  11173  Jonathan 			spa->spa_proc_state = SPA_PROC_CREATED;
    759  11173  Jonathan 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
    760  11173  Jonathan 				cv_wait(&spa->spa_proc_cv,
    761  11173  Jonathan 				    &spa->spa_proc_lock);
    762  11173  Jonathan 			}
    763  11173  Jonathan 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
    764  11173  Jonathan 			ASSERT(spa->spa_proc != &p0);
    765  11173  Jonathan 			ASSERT(spa->spa_did != 0);
    766  11173  Jonathan 		} else {
    767  11173  Jonathan #ifdef _KERNEL
    768  11173  Jonathan 			cmn_err(CE_WARN,
    769  11173  Jonathan 			    "Couldn't create process for zfs pool \"%s\"\n",
    770  11173  Jonathan 			    spa->spa_name);
    771  11173  Jonathan #endif
    772  11173  Jonathan 		}
    773  11173  Jonathan 	}
    774  11173  Jonathan 	mutex_exit(&spa->spa_proc_lock);
    775  11173  Jonathan 
    776  11173  Jonathan 	/* If we didn't create a process, we need to create our taskqs. */
    777  11173  Jonathan 	if (spa->spa_proc == &p0) {
    778  11173  Jonathan 		spa_create_zio_taskqs(spa);
    779   7754      Jeff 	}
    780   7754      Jeff 
    781   7754      Jeff 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
    782   7754      Jeff 	    offsetof(vdev_t, vdev_config_dirty_node));
    783   7754      Jeff 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
    784   7754      Jeff 	    offsetof(vdev_t, vdev_state_dirty_node));
    785    789    ahrens 
    786    789    ahrens 	txg_list_create(&spa->spa_vdev_txg_list,
    787    789    ahrens 	    offsetof(struct vdev, vdev_txg_node));
    788   1544  eschrock 
    789   1544  eschrock 	avl_create(&spa->spa_errlist_scrub,
    790   1544  eschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    791   1544  eschrock 	    offsetof(spa_error_entry_t, se_avl));
    792   1544  eschrock 	avl_create(&spa->spa_errlist_last,
    793   1544  eschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
    794   1544  eschrock 	    offsetof(spa_error_entry_t, se_avl));
    795    789    ahrens }
    796    789    ahrens 
    797    789    ahrens /*
    798    789    ahrens  * Opposite of spa_activate().
    799    789    ahrens  */
    800    789    ahrens static void
    801    789    ahrens spa_deactivate(spa_t *spa)
    802    789    ahrens {
    803    789    ahrens 	ASSERT(spa->spa_sync_on == B_FALSE);
    804    789    ahrens 	ASSERT(spa->spa_dsl_pool == NULL);
    805    789    ahrens 	ASSERT(spa->spa_root_vdev == NULL);
    806   9630      Jeff 	ASSERT(spa->spa_async_zio_root == NULL);
    807    789    ahrens 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
    808    789    ahrens 
    809    789    ahrens 	txg_list_destroy(&spa->spa_vdev_txg_list);
    810    789    ahrens 
    811   7754      Jeff 	list_destroy(&spa->spa_config_dirty_list);
    812   7754      Jeff 	list_destroy(&spa->spa_state_dirty_list);
    813   7754      Jeff 
    814   7754      Jeff 	for (int t = 0; t < ZIO_TYPES; t++) {
    815   7754      Jeff 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
    816  11146    George 			if (spa->spa_zio_taskq[t][q] != NULL)
    817  11146    George 				taskq_destroy(spa->spa_zio_taskq[t][q]);
    818   7754      Jeff 			spa->spa_zio_taskq[t][q] = NULL;
    819   7754      Jeff 		}
    820    789    ahrens 	}
    821    789    ahrens 
    822    789    ahrens 	metaslab_class_destroy(spa->spa_normal_class);
    823    789    ahrens 	spa->spa_normal_class = NULL;
    824   4527    perrin 
    825   4527    perrin 	metaslab_class_destroy(spa->spa_log_class);
    826   4527    perrin 	spa->spa_log_class = NULL;
    827   1544  eschrock 
    828   1544  eschrock 	/*
    829   1544  eschrock 	 * If this was part of an import or the open otherwise failed, we may
    830   1544  eschrock 	 * still have errors left in the queues.  Empty them just in case.
    831   1544  eschrock 	 */
    832   1544  eschrock 	spa_errlog_drain(spa);
    833   1544  eschrock 
    834   1544  eschrock 	avl_destroy(&spa->spa_errlist_scrub);
    835   1544  eschrock 	avl_destroy(&spa->spa_errlist_last);
    836    789    ahrens 
    837    789    ahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
    838  11173  Jonathan 
    839  11173  Jonathan 	mutex_enter(&spa->spa_proc_lock);
    840  11173  Jonathan 	if (spa->spa_proc_state != SPA_PROC_NONE) {
    841  11173  Jonathan 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
    842  11173  Jonathan 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
    843  11173  Jonathan 		cv_broadcast(&spa->spa_proc_cv);
    844  11173  Jonathan 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
    845  11173  Jonathan 			ASSERT(spa->spa_proc != &p0);
    846  11173  Jonathan 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
    847  11173  Jonathan 		}
    848  11173  Jonathan 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
    849  11173  Jonathan 		spa->spa_proc_state = SPA_PROC_NONE;
    850  11173  Jonathan 	}
    851  11173  Jonathan 	ASSERT(spa->spa_proc == &p0);
    852  11173  Jonathan 	mutex_exit(&spa->spa_proc_lock);
    853  11173  Jonathan 
    854  11173  Jonathan 	/*
    855  11173  Jonathan 	 * We want to make sure spa_thread() has actually exited the ZFS
    856  11173  Jonathan 	 * module, so that the module can't be unloaded out from underneath
    857  11173  Jonathan 	 * it.
    858  11173  Jonathan 	 */
    859  11173  Jonathan 	if (spa->spa_did != 0) {
    860  11173  Jonathan 		thread_join(spa->spa_did);
    861  11173  Jonathan 		spa->spa_did = 0;
    862  11173  Jonathan 	}
    863    789    ahrens }
    864    789    ahrens 
    865    789    ahrens /*
    866    789    ahrens  * Verify a pool configuration, and construct the vdev tree appropriately.  This
    867    789    ahrens  * will create all the necessary vdevs in the appropriate layout, with each vdev
    868    789    ahrens  * in the CLOSED state.  This will prep the pool before open/creation/import.
    869    789    ahrens  * All vdev validation is done by the vdev_alloc() routine.
    870    789    ahrens  */
    871   2082  eschrock static int
    872   2082  eschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
    873   2082  eschrock     uint_t id, int atype)
    874    789    ahrens {
    875    789    ahrens 	nvlist_t **child;
    876   9816    George 	uint_t children;
    877   2082  eschrock 	int error;
    878    789    ahrens 
    879   2082  eschrock 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
    880   2082  eschrock 		return (error);
    881    789    ahrens 
    882   2082  eschrock 	if ((*vdp)->vdev_ops->vdev_op_leaf)
    883   2082  eschrock 		return (0);
    884    789    ahrens 
    885   7754      Jeff 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
    886   7754      Jeff 	    &child, &children);
    887   7754      Jeff 
    888   7754      Jeff 	if (error == ENOENT)
    889   7754      Jeff 		return (0);
    890   7754      Jeff 
    891   7754      Jeff 	if (error) {
    892   2082  eschrock 		vdev_free(*vdp);
    893   2082  eschrock 		*vdp = NULL;
    894   2082  eschrock 		return (EINVAL);
    895    789    ahrens 	}
    896    789    ahrens 
    897   9816    George 	for (int c = 0; c < children; c++) {
    898   2082  eschrock 		vdev_t *vd;
    899   2082  eschrock 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
    900   2082  eschrock 		    atype)) != 0) {
    901   2082  eschrock 			vdev_free(*vdp);
    902   2082  eschrock 			*vdp = NULL;
    903   2082  eschrock 			return (error);
    904    789    ahrens 		}
    905    789    ahrens 	}
    906    789    ahrens 
    907   2082  eschrock 	ASSERT(*vdp != NULL);
    908   2082  eschrock 
    909   2082  eschrock 	return (0);
    910    789    ahrens }
    911    789    ahrens 
    912    789    ahrens /*
    913    789    ahrens  * Opposite of spa_load().
    914    789    ahrens  */
    915    789    ahrens static void
    916    789    ahrens spa_unload(spa_t *spa)
    917    789    ahrens {
    918   2082  eschrock 	int i;
    919   2082  eschrock 
    920   7754      Jeff 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
    921   7754      Jeff 
    922    789    ahrens 	/*
    923   1544  eschrock 	 * Stop async tasks.
    924   1544  eschrock 	 */
    925   1544  eschrock 	spa_async_suspend(spa);
    926   1544  eschrock 
    927   1544  eschrock 	/*
    928    789    ahrens 	 * Stop syncing.
    929    789    ahrens 	 */
    930    789    ahrens 	if (spa->spa_sync_on) {
    931    789    ahrens 		txg_sync_stop(spa->spa_dsl_pool);
    932    789    ahrens 		spa->spa_sync_on = B_FALSE;
    933    789    ahrens 	}
    934    789    ahrens 
    935    789    ahrens 	/*
    936   7754      Jeff 	 * Wait for any outstanding async I/O to complete.
    937   7754      Jeff 	 */
    938   9234    George 	if (spa->spa_async_zio_root != NULL) {
    939   9234    George 		(void) zio_wait(spa->spa_async_zio_root);
    940   9234    George 		spa->spa_async_zio_root = NULL;
    941   9234    George 	}
    942    789    ahrens 
    943    789    ahrens 	/*
    944    789    ahrens 	 * Close the dsl pool.
    945    789    ahrens 	 */
    946    789    ahrens 	if (spa->spa_dsl_pool) {
    947    789    ahrens 		dsl_pool_close(spa->spa_dsl_pool);
    948    789    ahrens 		spa->spa_dsl_pool = NULL;
    949    789    ahrens 	}
    950  10922      Jeff 
    951  10922      Jeff 	ddt_unload(spa);
    952   8241      Jeff 
    953   8241      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
    954   8241      Jeff 
    955   8241      Jeff 	/*
    956   8241      Jeff 	 * Drop and purge level 2 cache
    957   8241      Jeff 	 */
    958   8241      Jeff 	spa_l2cache_drop(spa);
    959    789    ahrens 
    960    789    ahrens 	/*
    961    789    ahrens 	 * Close all vdevs.
    962    789    ahrens 	 */
    963   1585   bonwick 	if (spa->spa_root_vdev)
    964    789    ahrens 		vdev_free(spa->spa_root_vdev);
    965   1585   bonwick 	ASSERT(spa->spa_root_vdev == NULL);
    966   1544  eschrock 
    967   5450   brendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
    968   5450   brendan 		vdev_free(spa->spa_spares.sav_vdevs[i]);
    969   5450   brendan 	if (spa->spa_spares.sav_vdevs) {
    970   5450   brendan 		kmem_free(spa->spa_spares.sav_vdevs,
    971   5450   brendan 		    spa->spa_spares.sav_count * sizeof (void *));
    972   5450   brendan 		spa->spa_spares.sav_vdevs = NULL;
    973   5450   brendan 	}
    974   5450   brendan 	if (spa->spa_spares.sav_config) {
    975   5450   brendan 		nvlist_free(spa->spa_spares.sav_config);
    976   5450   brendan 		spa->spa_spares.sav_config = NULL;
    977   5450   brendan 	}
    978   7377      Eric 	spa->spa_spares.sav_count = 0;
    979   5450   brendan 
    980   5450   brendan 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
    981   5450   brendan 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
    982   5450   brendan 	if (spa->spa_l2cache.sav_vdevs) {
    983   5450   brendan 		kmem_free(spa->spa_l2cache.sav_vdevs,
    984   5450   brendan 		    spa->spa_l2cache.sav_count * sizeof (void *));
    985   5450   brendan 		spa->spa_l2cache.sav_vdevs = NULL;
    986   5450   brendan 	}
    987   5450   brendan 	if (spa->spa_l2cache.sav_config) {
    988   5450   brendan 		nvlist_free(spa->spa_l2cache.sav_config);
    989   5450   brendan 		spa->spa_l2cache.sav_config = NULL;
    990   2082  eschrock 	}
    991   7377      Eric 	spa->spa_l2cache.sav_count = 0;
    992   2082  eschrock 
    993   1544  eschrock 	spa->spa_async_suspended = 0;
    994   8241      Jeff 
    995   8241      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
    996   2082  eschrock }
    997   2082  eschrock 
    998   2082  eschrock /*
    999   2082  eschrock  * Load (or re-load) the current list of vdevs describing the active spares for
   1000   2082  eschrock  * this pool.  When this is called, we have some form of basic information in
   1001   5450   brendan  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
   1002   5450   brendan  * then re-generate a more complete list including status information.
   1003   2082  eschrock  */
   1004   2082  eschrock static void
   1005   2082  eschrock spa_load_spares(spa_t *spa)
   1006   2082  eschrock {
   1007   2082  eschrock 	nvlist_t **spares;
   1008   2082  eschrock 	uint_t nspares;
   1009   2082  eschrock 	int i;
   1010   3377  eschrock 	vdev_t *vd, *tvd;
   1011   7754      Jeff 
   1012   7754      Jeff 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
   1013   2082  eschrock 
   1014   2082  eschrock 	/*
   1015   2082  eschrock 	 * First, close and free any existing spare vdevs.
   1016   2082  eschrock 	 */
   1017   5450   brendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
   1018   5450   brendan 		vd = spa->spa_spares.sav_vdevs[i];
   1019   3377  eschrock 
   1020   3377  eschrock 		/* Undo the call to spa_activate() below */
   1021   6643  eschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
   1022   6643  eschrock 		    B_FALSE)) != NULL && tvd->vdev_isspare)
   1023   3377  eschrock 			spa_spare_remove(tvd);
   1024   3377  eschrock 		vdev_close(vd);
   1025   3377  eschrock 		vdev_free(vd);
   1026   2082  eschrock 	}
   1027   3377  eschrock 
   1028   5450   brendan 	if (spa->spa_spares.sav_vdevs)
   1029   5450   brendan 		kmem_free(spa->spa_spares.sav_vdevs,
   1030   5450   brendan 		    spa->spa_spares.sav_count * sizeof (void *));
   1031   5450   brendan 
   1032   5450   brendan 	if (spa->spa_spares.sav_config == NULL)
   1033   2082  eschrock 		nspares = 0;
   1034   2082  eschrock 	else
   1035   5450   brendan 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
   1036   2082  eschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
   1037   2082  eschrock 
   1038   5450   brendan 	spa->spa_spares.sav_count = (int)nspares;
   1039   5450   brendan 	spa->spa_spares.sav_vdevs = NULL;
   1040   2082  eschrock 
   1041   2082  eschrock 	if (nspares == 0)
   1042   2082  eschrock 		return;
   1043   2082  eschrock 
   1044   2082  eschrock 	/*
   1045   2082  eschrock 	 * Construct the array of vdevs, opening them to get status in the
   1046   3377  eschrock 	 * process.   For each spare, there is potentially two different vdev_t
   1047   3377  eschrock 	 * structures associated with it: one in the list of spares (used only
   1048   3377  eschrock 	 * for basic validation purposes) and one in the active vdev
   1049   3377  eschrock 	 * configuration (if it's spared in).  During this phase we open and
   1050   3377  eschrock 	 * validate each vdev on the spare list.  If the vdev also exists in the
   1051   3377  eschrock 	 * active configuration, then we also mark this vdev as an active spare.
   1052   2082  eschrock 	 */
   1053   5450   brendan 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
   1054   5450   brendan 	    KM_SLEEP);
   1055   5450   brendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
   1056   2082  eschrock 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
   1057   2082  eschrock 		    VDEV_ALLOC_SPARE) == 0);
   1058   2082  eschrock 		ASSERT(vd != NULL);
   1059   2082  eschrock 
   1060   5450   brendan 		spa->spa_spares.sav_vdevs[i] = vd;
   1061   3377  eschrock 
   1062   6643  eschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
   1063   6643  eschrock 		    B_FALSE)) != NULL) {
   1064   3377  eschrock 			if (!tvd->vdev_isspare)
   1065   3377  eschrock 				spa_spare_add(tvd);
   1066   3377  eschrock 
   1067   3377  eschrock 			/*
   1068   3377  eschrock 			 * We only mark the spare active if we were successfully
   1069   3377  eschrock 			 * able to load the vdev.  Otherwise, importing a pool
   1070   3377  eschrock 			 * with a bad active spare would result in strange
   1071   3377  eschrock 			 * behavior, because multiple pool would think the spare
   1072   3377  eschrock 			 * is actively in use.
   1073   3377  eschrock 			 *
   1074   3377  eschrock 			 * There is a vulnerability here to an equally bizarre
   1075   3377  eschrock 			 * circumstance, where a dead active spare is later
   1076   3377  eschrock 			 * brought back to life (onlined or otherwise).  Given
   1077   3377  eschrock 			 * the rarity of this scenario, and the extra complexity
   1078   3377  eschrock 			 * it adds, we ignore the possibility.
   1079   3377  eschrock 			 */
   1080   3377  eschrock 			if (!vdev_is_dead(tvd))
   1081   3377  eschrock 				spa_spare_activate(tvd);
   1082   3377  eschrock 		}
   1083   2082  eschrock 
   1084   7754      Jeff 		vd->vdev_top = vd;
   1085   9425      Eric 		vd->vdev_aux = &spa->spa_spares;
   1086   7754      Jeff 
   1087   2082  eschrock 		if (vdev_open(vd) != 0)
   1088   2082  eschrock 			continue;
   1089   2082  eschrock 
   1090   5450   brendan 		if (vdev_validate_aux(vd) == 0)
   1091   5450   brendan 			spa_spare_add(vd);
   1092   2082  eschrock 	}
   1093   2082  eschrock 
   1094   2082  eschrock 	/*
   1095   2082  eschrock 	 * Recompute the stashed list of spares, with status information
   1096   2082  eschrock 	 * this time.
   1097   2082  eschrock 	 */
   1098   5450   brendan 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
   1099   2082  eschrock 	    DATA_TYPE_NVLIST_ARRAY) == 0);
   1100   2082  eschrock 
   1101   5450   brendan 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
   1102   5450   brendan 	    KM_SLEEP);
   1103   5450   brendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
   1104   5450   brendan 		spares[i] = vdev_config_generate(spa,
   1105   5450   brendan 		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
   1106   5450   brendan 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
   1107   5450   brendan 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
   1108   5450   brendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
   1109   2082  eschrock 		nvlist_free(spares[i]);
   1110   5450   brendan 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
   1111   5450   brendan }
   1112   5450   brendan 
   1113   5450   brendan /*
   1114   5450   brendan  * Load (or re-load) the current list of vdevs describing the active l2cache for
   1115   5450   brendan  * this pool.  When this is called, we have some form of basic information in
   1116   5450   brendan  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
   1117   5450   brendan  * then re-generate a more complete list including status information.
   1118   5450   brendan  * Devices which are already active have their details maintained, and are
   1119   5450   brendan  * not re-opened.
   1120   5450   brendan  */
   1121   5450   brendan static void
   1122   5450   brendan spa_load_l2cache(spa_t *spa)
   1123   5450   brendan {
   1124   5450   brendan 	nvlist_t **l2cache;
   1125   5450   brendan 	uint_t nl2cache;
   1126   5450   brendan 	int i, j, oldnvdevs;
   1127   9816    George 	uint64_t guid;
   1128   5450   brendan 	vdev_t *vd, **oldvdevs, **newvdevs;
   1129   5450   brendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
   1130   5450   brendan 
   1131   7754      Jeff 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
   1132   7754      Jeff 
   1133   5450   brendan 	if (sav->sav_config != NULL) {
   1134   5450   brendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
   1135   5450   brendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
   1136   5450   brendan 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
   1137   5450   brendan 	} else {
   1138   5450   brendan 		nl2cache = 0;
   1139   5450   brendan 	}
   1140   5450   brendan 
   1141   5450   brendan 	oldvdevs = sav->sav_vdevs;
   1142   5450   brendan 	oldnvdevs = sav->sav_count;
   1143   5450   brendan 	sav->sav_vdevs = NULL;
   1144   5450   brendan 	sav->sav_count = 0;
   1145   5450   brendan 
   1146   5450   brendan 	/*
   1147   5450   brendan 	 * Process new nvlist of vdevs.
   1148   5450   brendan 	 */
   1149   5450   brendan 	for (i = 0; i < nl2cache; i++) {
   1150   5450   brendan 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
   1151   5450   brendan 		    &guid) == 0);
   1152   5450   brendan 
   1153   5450   brendan 		newvdevs[i] = NULL;
   1154   5450   brendan 		for (j = 0; j < oldnvdevs; j++) {
   1155   5450   brendan 			vd = oldvdevs[j];
   1156   5450   brendan 			if (vd != NULL && guid == vd->vdev_guid) {
   1157   5450   brendan 				/*
   1158   5450   brendan 				 * Retain previous vdev for add/remove ops.
   1159   5450   brendan 				 */
   1160   5450   brendan 				newvdevs[i] = vd;
   1161   5450   brendan 				oldvdevs[j] = NULL;
   1162   5450   brendan 				break;
   1163   5450   brendan 			}
   1164   5450   brendan 		}
   1165   5450   brendan 
   1166   5450   brendan 		if (newvdevs[i] == NULL) {
   1167   5450   brendan 			/*
   1168   5450   brendan 			 * Create new vdev
   1169   5450   brendan 			 */
   1170   5450   brendan 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
   1171   5450   brendan 			    VDEV_ALLOC_L2CACHE) == 0);
   1172   5450   brendan 			ASSERT(vd != NULL);
   1173   5450   brendan 			newvdevs[i] = vd;
   1174   5450   brendan 
   1175   5450   brendan 			/*
   1176   5450   brendan 			 * Commit this vdev as an l2cache device,
   1177   5450   brendan 			 * even if it fails to open.
   1178   5450   brendan 			 */
   1179   5450   brendan 			spa_l2cache_add(vd);
   1180   5450   brendan 
   1181   6643  eschrock 			vd->vdev_top = vd;
   1182   6643  eschrock 			vd->vdev_aux = sav;
   1183   6643  eschrock 
   1184   6643  eschrock 			spa_l2cache_activate(vd);
   1185   6643  eschrock 
   1186   5450   brendan 			if (vdev_open(vd) != 0)
   1187   5450   brendan 				continue;
   1188   5450   brendan 
   1189   5450   brendan 			(void) vdev_validate_aux(vd);
   1190   5450   brendan 
   1191   9816    George 			if (!vdev_is_dead(vd))
   1192   9816    George 				l2arc_add_vdev(spa, vd);
   1193   5450   brendan 		}
   1194   5450   brendan 	}
   1195   5450   brendan 
   1196   5450   brendan 	/*
   1197   5450   brendan 	 * Purge vdevs that were dropped
   1198   5450   brendan 	 */
   1199   5450   brendan 	for (i = 0; i < oldnvdevs; i++) {
   1200   5450   brendan 		uint64_t pool;
   1201   5450   brendan 
   1202   5450   brendan 		vd = oldvdevs[i];
   1203   5450   brendan 		if (vd != NULL) {
   1204   8241      Jeff 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
   1205   8241      Jeff 			    pool != 0ULL && l2arc_vdev_present(vd))
   1206   5450   brendan 				l2arc_remove_vdev(vd);
   1207   5450   brendan 			(void) vdev_close(vd);
   1208   5450   brendan 			spa_l2cache_remove(vd);
   1209   5450   brendan 		}
   1210   5450   brendan 	}
   1211   5450   brendan 
   1212   5450   brendan 	if (oldvdevs)
   1213   5450   brendan 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
   1214   5450   brendan 
   1215   5450   brendan 	if (sav->sav_config == NULL)
   1216   5450   brendan 		goto out;
   1217   5450   brendan 
   1218   5450   brendan 	sav->sav_vdevs = newvdevs;
   1219   5450   brendan 	sav->sav_count = (int)nl2cache;
   1220   5450   brendan 
   1221   5450   brendan 	/*
   1222   5450   brendan 	 * Recompute the stashed list of l2cache devices, with status
   1223   5450   brendan 	 * information this time.
   1224   5450   brendan 	 */
   1225   5450   brendan 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
   1226   5450   brendan 	    DATA_TYPE_NVLIST_ARRAY) == 0);
   1227   5450   brendan 
   1228   5450   brendan 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
   1229   5450   brendan 	for (i = 0; i < sav->sav_count; i++)
   1230   5450   brendan 		l2cache[i] = vdev_config_generate(spa,
   1231   5450   brendan 		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
   1232   5450   brendan 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
   1233   5450   brendan 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
   1234   5450   brendan out:
   1235   5450   brendan 	for (i = 0; i < sav->sav_count; i++)
   1236   5450   brendan 		nvlist_free(l2cache[i]);
   1237   5450   brendan 	if (sav->sav_count)
   1238   5450   brendan 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
   1239   2082  eschrock }
   1240   2082  eschrock 
   1241   2082  eschrock static int
   1242   2082  eschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
   1243   2082  eschrock {
   1244   2082  eschrock 	dmu_buf_t *db;
   1245   2082  eschrock 	char *packed = NULL;
   1246   2082  eschrock 	size_t nvsize = 0;
   1247   2082  eschrock 	int error;
   1248   2082  eschrock 	*value = NULL;
   1249   2082  eschrock 
   1250   2082  eschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
   1251   2082  eschrock 	nvsize = *(uint64_t *)db->db_data;
   1252   2082  eschrock 	dmu_buf_rele(db, FTAG);
   1253   2082  eschrock 
   1254   2082  eschrock 	packed = kmem_alloc(nvsize, KM_SLEEP);
   1255   9512      Neil 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
   1256   9512      Neil 	    DMU_READ_PREFETCH);
   1257   2082  eschrock 	if (error == 0)
   1258   2082  eschrock 		error = nvlist_unpack(packed, nvsize, value, 0);
   1259   2082  eschrock 	kmem_free(packed, nvsize);
   1260   2082  eschrock 
   1261   2082  eschrock 	return (error);
   1262    789    ahrens }
   1263    789    ahrens 
   1264    789    ahrens /*
   1265   4451  eschrock  * Checks to see if the given vdev could not be opened, in which case we post a
   1266   4451  eschrock  * sysevent to notify the autoreplace code that the device has been removed.
   1267   4451  eschrock  */
   1268   4451  eschrock static void
   1269   4451  eschrock spa_check_removed(vdev_t *vd)
   1270   4451  eschrock {
   1271   9816    George 	for (int c = 0; c < vd->vdev_children; c++)
   1272   4451  eschrock 		spa_check_removed(vd->vdev_child[c]);
   1273   4451  eschrock 
   1274   4451  eschrock 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
   1275   4451  eschrock 		zfs_post_autoreplace(vd->vdev_spa, vd);
   1276   4451  eschrock 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
   1277   4451  eschrock 	}
   1278   4451  eschrock }
   1279   4451  eschrock 
   1280   4451  eschrock /*
   1281   9701    George  * Load the slog device state from the config object since it's possible
   1282   9701    George  * that the label does not contain the most up-to-date information.
   1283   9701    George  */
   1284   9701    George void
   1285  10594    George spa_load_log_state(spa_t *spa, nvlist_t *nv)
   1286  10594    George {
   1287  10594    George 	vdev_t *ovd, *rvd = spa->spa_root_vdev;
   1288  10594    George 
   1289  10594    George 	/*
   1290  10594    George 	 * Load the original root vdev tree from the passed config.
   1291  10594    George 	 */
   1292  10594    George 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1293  10594    George 	VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
   1294  10594    George 
   1295  10594    George 	for (int c = 0; c < rvd->vdev_children; c++) {
   1296  10594    George 		vdev_t *cvd = rvd->vdev_child[c];
   1297  10594    George 		if (cvd->vdev_islog)
   1298  10594    George 			vdev_load_log_state(cvd, ovd->vdev_child[c]);
   1299  10594    George 	}
   1300  10594    George 	vdev_free(ovd);
   1301  10594    George 	spa_config_exit(spa, SCL_ALL, FTAG);
   1302   9701    George }
   1303   9701    George 
   1304   9701    George /*
   1305   7294    perrin  * Check for missing log devices
   1306   7294    perrin  */
   1307   7294    perrin int
   1308   7294    perrin spa_check_logs(spa_t *spa)
   1309   7294    perrin {
   1310   7294    perrin 	switch (spa->spa_log_state) {
   1311   7294    perrin 	case SPA_LOG_MISSING:
   1312   7294    perrin 		/* need to recheck in case slog has been restored */
   1313   7294    perrin 	case SPA_LOG_UNKNOWN:
   1314   7294    perrin 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
   1315   7294    perrin 		    DS_FIND_CHILDREN)) {
   1316   7294    perrin 			spa->spa_log_state = SPA_LOG_MISSING;
   1317   7294    perrin 			return (1);
   1318   7294    perrin 		}
   1319   7294    perrin 		break;
   1320   9701    George 	}
   1321   7294    perrin 	return (0);
   1322   7294    perrin }
   1323   7294    perrin 
   1324  10672      Eric static void
   1325  10672      Eric spa_aux_check_removed(spa_aux_vdev_t *sav)
   1326  10672      Eric {
   1327  10922      Jeff 	for (int i = 0; i < sav->sav_count; i++)
   1328  10672      Eric 		spa_check_removed(sav->sav_vdevs[i]);
   1329  10922      Jeff }
   1330  10922      Jeff 
   1331  10922      Jeff void
   1332  10922      Jeff spa_claim_notify(zio_t *zio)
   1333  10922      Jeff {
   1334  10922      Jeff 	spa_t *spa = zio->io_spa;
   1335  10922      Jeff 
   1336  10922      Jeff 	if (zio->io_error)
   1337  10922      Jeff 		return;
   1338  10922      Jeff 
   1339  10922      Jeff 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
   1340  10922      Jeff 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
   1341  10922      Jeff 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
   1342  10922      Jeff 	mutex_exit(&spa->spa_props_lock);
   1343  10672      Eric }
   1344  10672      Eric 
   1345  10921       Tim typedef struct spa_load_error {
   1346  10921       Tim 	uint64_t	sle_metadata_count;
   1347  10921       Tim 	uint64_t	sle_data_count;
   1348  10921       Tim } spa_load_error_t;
   1349  10921       Tim 
   1350  10921       Tim static void
   1351  10921       Tim spa_load_verify_done(zio_t *zio)
   1352  10921       Tim {
   1353  10921       Tim 	blkptr_t *bp = zio->io_bp;
   1354  10921       Tim 	spa_load_error_t *sle = zio->io_private;
   1355  10921       Tim 	dmu_object_type_t type = BP_GET_TYPE(bp);
   1356  10921       Tim 	int error = zio->io_error;
   1357  10921       Tim 
   1358  10921       Tim 	if (error) {
   1359  10921       Tim 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
   1360  10921       Tim 		    type != DMU_OT_INTENT_LOG)
   1361  10921       Tim 			atomic_add_64(&sle->sle_metadata_count, 1);
   1362  10921       Tim 		else
   1363  10921       Tim 			atomic_add_64(&sle->sle_data_count, 1);
   1364  10921       Tim 	}
   1365  10921       Tim 	zio_data_buf_free(zio->io_data, zio->io_size);
   1366  10921       Tim }
   1367  10921       Tim 
   1368  10921       Tim /*ARGSUSED*/
   1369  10921       Tim static int
   1370  10922      Jeff spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
   1371  10922      Jeff     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
   1372  10921       Tim {
   1373  10921       Tim 	if (bp != NULL) {
   1374  10921       Tim 		zio_t *rio = arg;
   1375  10921       Tim 		size_t size = BP_GET_PSIZE(bp);
   1376  10921       Tim 		void *data = zio_data_buf_alloc(size);
   1377  10921       Tim 
   1378  10921       Tim 		zio_nowait(zio_read(rio, spa, bp, data, size,
   1379  10921       Tim 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
   1380  10921       Tim 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
   1381  10921       Tim 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
   1382  10921       Tim 	}
   1383  10921       Tim 	return (0);
   1384  10921       Tim }
   1385  10921       Tim 
   1386  10921       Tim static int
   1387  10921       Tim spa_load_verify(spa_t *spa)
   1388  10921       Tim {
   1389  10921       Tim 	zio_t *rio;
   1390  10921       Tim 	spa_load_error_t sle = { 0 };
   1391  10921       Tim 	zpool_rewind_policy_t policy;
   1392  10921       Tim 	boolean_t verify_ok = B_FALSE;
   1393  10921       Tim 	int error;
   1394  10921       Tim 
   1395  10921       Tim 	rio = zio_root(spa, NULL, &sle,
   1396  10921       Tim 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
   1397  10921       Tim 
   1398  11125      Jeff 	error = traverse_pool(spa, spa->spa_verify_min_txg,
   1399  11125      Jeff 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
   1400  10921       Tim 
   1401  10921       Tim 	(void) zio_wait(rio);
   1402  10921       Tim 
   1403  10921       Tim 	zpool_get_rewind_policy(spa->spa_config, &policy);
   1404  10921       Tim 
   1405  10921       Tim 	spa->spa_load_meta_errors = sle.sle_metadata_count;
   1406  10921       Tim 	spa->spa_load_data_errors = sle.sle_data_count;
   1407  10921       Tim 
   1408  10921       Tim 	if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta &&
   1409  10921       Tim 	    sle.sle_data_count <= policy.zrp_maxdata) {
   1410  10921       Tim 		verify_ok = B_TRUE;
   1411  10921       Tim 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
   1412  10921       Tim 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
   1413  11026       Tim 	} else {
   1414  11026       Tim 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
   1415  10921       Tim 	}
   1416  10921       Tim 
   1417  10921       Tim 	if (error) {
   1418  10921       Tim 		if (error != ENXIO && error != EIO)
   1419  10921       Tim 			error = EIO;
   1420  10921       Tim 		return (error);
   1421  10921       Tim 	}
   1422  10921       Tim 
   1423  10921       Tim 	return (verify_ok ? 0 : EIO);
   1424  10921       Tim }
   1425  10921       Tim 
   1426   7294    perrin /*
   1427    789    ahrens  * Load an existing storage pool, using the pool's builtin spa_config as a
   1428   1544  eschrock  * source of configuration information.
   1429    789    ahrens  */
   1430    789    ahrens static int
   1431  10921       Tim spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
   1432    789    ahrens {
   1433    789    ahrens 	int error = 0;
   1434  10594    George 	nvlist_t *nvconfig, *nvroot = NULL;
   1435    789    ahrens 	vdev_t *rvd;
   1436    789    ahrens 	uberblock_t *ub = &spa->spa_uberblock;
   1437   1635   bonwick 	uint64_t config_cache_txg = spa->spa_config_txg;
   1438    789    ahrens 	uint64_t pool_guid;
   1439   2082  eschrock 	uint64_t version;
   1440   4451  eschrock 	uint64_t autoreplace = 0;
   1441   8241      Jeff 	int orig_mode = spa->spa_mode;
   1442   7294    perrin 	char *ereport = FM_EREPORT_ZFS_POOL;
   1443  10921       Tim 	nvlist_t *config = spa->spa_config;
   1444   8241      Jeff 
   1445   8241      Jeff 	/*
   1446   8241      Jeff 	 * If this is an untrusted config, access the pool in read-only mode.
   1447   8241      Jeff 	 * This prevents things like resilvering recently removed devices.
   1448   8241      Jeff 	 */
   1449   8241      Jeff 	if (!mosconfig)
   1450   8241      Jeff 		spa->spa_mode = FREAD;
   1451   7754      Jeff 
   1452   7754      Jeff 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
   1453    789    ahrens 
   1454   1544  eschrock 	spa->spa_load_state = state;
   1455   1635   bonwick 
   1456    789    ahrens 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
   1457   1733   bonwick 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
   1458   1544  eschrock 		error = EINVAL;
   1459   1544  eschrock 		goto out;
   1460   1544  eschrock 	}
   1461   2082  eschrock 
   1462   2082  eschrock 	/*
   1463   2082  eschrock 	 * Versioning wasn't explicitly added to the label until later, so if
   1464   2082  eschrock 	 * it's not present treat it as the initial version.
   1465   2082  eschrock 	 */
   1466   2082  eschrock 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
   1467   4577    ahrens 		version = SPA_VERSION_INITIAL;
   1468   1733   bonwick 
   1469   1733   bonwick 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
   1470   1733   bonwick 	    &spa->spa_config_txg);
   1471    789    ahrens 
   1472   1635   bonwick 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
   1473   1544  eschrock 	    spa_guid_exists(pool_guid, 0)) {
   1474   1544  eschrock 		error = EEXIST;
   1475   1544  eschrock 		goto out;
   1476   1544  eschrock 	}
   1477   2174  eschrock 
   1478   2174  eschrock 	spa->spa_load_guid = pool_guid;
   1479   9234    George 
   1480   9234    George 	/*
   1481   9234    George 	 * Create "The Godfather" zio to hold all async IOs
   1482   9234    George 	 */
   1483   9630      Jeff 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
   1484   9630      Jeff 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
   1485    789    ahrens 
   1486    789    ahrens 	/*
   1487   2082  eschrock 	 * Parse the configuration into a vdev tree.  We explicitly set the
   1488   2082  eschrock 	 * value that will be returned by spa_version() since parsing the
   1489   2082  eschrock 	 * configuration requires knowing the version number.
   1490    789    ahrens 	 */
   1491   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1492   2082  eschrock 	spa->spa_ubsync.ub_version = version;
   1493   2082  eschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
   1494   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   1495    789    ahrens 
   1496   2082  eschrock 	if (error != 0)
   1497   1544  eschrock 		goto out;
   1498    789    ahrens 
   1499   1585   bonwick 	ASSERT(spa->spa_root_vdev == rvd);
   1500    789    ahrens 	ASSERT(spa_guid(spa) == pool_guid);
   1501    789    ahrens 
   1502    789    ahrens 	/*
   1503    789    ahrens 	 * Try to open all vdevs, loading each label in the process.
   1504    789    ahrens 	 */
   1505   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1506   4070  mc142369 	error = vdev_open(rvd);
   1507   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   1508   4070  mc142369 	if (error != 0)
   1509   1544  eschrock 		goto out;
   1510    789    ahrens 
   1511    789    ahrens 	/*
   1512   9276      Mark 	 * We need to validate the vdev labels against the configuration that
   1513   9276      Mark 	 * we have in hand, which is dependent on the setting of mosconfig. If
   1514   9276      Mark 	 * mosconfig is true then we're validating the vdev labels based on
   1515   9276      Mark 	 * that config. Otherwise, we're validating against the cached config
   1516   9276      Mark 	 * (zpool.cache) that was read when we loaded the zfs module, and then
   1517   9276      Mark 	 * later we will recursively call spa_load() and validate against
   1518   9276      Mark 	 * the vdev config.
   1519   9276      Mark 	 */
   1520   9276      Mark 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1521   9276      Mark 	error = vdev_validate(rvd);
   1522   9276      Mark 	spa_config_exit(spa, SCL_ALL, FTAG);
   1523   9276      Mark 	if (error != 0)
   1524   9276      Mark 		goto out;
   1525   1986  eschrock 
   1526   1986  eschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
   1527   1986  eschrock 		error = ENXIO;
   1528   1986  eschrock 		goto out;
   1529   1986  eschrock 	}
   1530   1986  eschrock 
   1531   1986  eschrock 	/*
   1532    789    ahrens 	 * Find the best uberblock.
   1533    789    ahrens 	 */
   1534   7754      Jeff 	vdev_uberblock_load(NULL, rvd, ub);
   1535    789    ahrens 
   1536    789    ahrens 	/*
   1537    789    ahrens 	 * If we weren't able to find a single valid uberblock, return failure.
   1538    789    ahrens 	 */
   1539    789    ahrens 	if (ub->ub_txg == 0) {
   1540   1760  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1541   1760  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1542   1544  eschrock 		error = ENXIO;
   1543   1544  eschrock 		goto out;
   1544   1544  eschrock 	}
   1545   1544  eschrock 
   1546   1544  eschrock 	/*
   1547   1544  eschrock 	 * If the pool is newer than the code, we can't open it.
   1548   1544  eschrock 	 */
   1549   4577    ahrens 	if (ub->ub_version > SPA_VERSION) {
   1550   1760  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1551   1760  eschrock 		    VDEV_AUX_VERSION_NEWER);
   1552   1544  eschrock 		error = ENOTSUP;
   1553   1544  eschrock 		goto out;
   1554    789    ahrens 	}
   1555    789    ahrens 
   1556    789    ahrens 	/*
   1557    789    ahrens 	 * If the vdev guid sum doesn't match the uberblock, we have an
   1558    789    ahrens 	 * incomplete configuration.
   1559    789    ahrens 	 */
   1560   1732   bonwick 	if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) {
   1561   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1562   1544  eschrock 		    VDEV_AUX_BAD_GUID_SUM);
   1563   1544  eschrock 		error = ENXIO;
   1564   1544  eschrock 		goto out;
   1565    789    ahrens 	}
   1566    789    ahrens 
   1567    789    ahrens 	/*
   1568    789    ahrens 	 * Initialize internal SPA structures.
   1569    789    ahrens 	 */
   1570    789    ahrens 	spa->spa_state = POOL_STATE_ACTIVE;
   1571    789    ahrens 	spa->spa_ubsync = spa->spa_uberblock;
   1572  10921       Tim 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
   1573  10921       Tim 	    TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE;
   1574  10921       Tim 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
   1575  10921       Tim 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
   1576  10922      Jeff 	spa->spa_claim_max_txg = spa->spa_first_txg;
   1577  10922      Jeff 
   1578   1544  eschrock 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
   1579   1544  eschrock 	if (error) {
   1580   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1581   1544  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1582  10921       Tim 		error = EIO;
   1583   1544  eschrock 		goto out;
   1584   1544  eschrock 	}
   1585    789    ahrens 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
   1586    789    ahrens 
   1587   1544  eschrock 	if (zap_lookup(spa->spa_meta_objset,
   1588    789    ahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
   1589   1544  eschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object) != 0) {
   1590   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1591   1544  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1592   1544  eschrock 		error = EIO;
   1593   1544  eschrock 		goto out;
   1594   1544  eschrock 	}
   1595    789    ahrens 
   1596  10594    George 	if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) {
   1597  10594    George 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1598  10594    George 		    VDEV_AUX_CORRUPT_DATA);
   1599  10594    George 		error = EIO;
   1600  10594    George 		goto out;
   1601  10594    George 	}
   1602  10594    George 
   1603    789    ahrens 	if (!mosconfig) {
   1604   3975  ek110237 		uint64_t hostid;
   1605    789    ahrens 
   1606  10594    George 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
   1607   7706       Lin 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
   1608   3975  ek110237 			char *hostname;
   1609   3975  ek110237 			unsigned long myhostid = 0;
   1610   3975  ek110237 
   1611  10594    George 			VERIFY(nvlist_lookup_string(nvconfig,
   1612   3975  ek110237 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
   1613   3975  ek110237 
   1614   8662    Jordan #ifdef	_KERNEL
   1615   8662    Jordan 			myhostid = zone_get_hostid(NULL);
   1616   8662    Jordan #else	/* _KERNEL */
   1617   8662    Jordan 			/*
   1618   8662    Jordan 			 * We're emulating the system's hostid in userland, so
   1619   8662    Jordan 			 * we can't use zone_get_hostid().
   1620   8662    Jordan 			 */
   1621   3975  ek110237 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
   1622   8662    Jordan #endif	/* _KERNEL */
   1623   4178     lling 			if (hostid != 0 && myhostid != 0 &&
   1624   8662    Jordan 			    hostid != myhostid) {
   1625   3975  ek110237 				cmn_err(CE_WARN, "pool '%s' could not be "
   1626   3975  ek110237 				    "loaded as it was last accessed by "
   1627   7706       Lin 				    "another system (host: %s hostid: 0x%lx). "
   1628   3975  ek110237 				    "See: http://www.sun.com/msg/ZFS-8000-EY",
   1629   7754      Jeff 				    spa_name(spa), hostname,
   1630   3975  ek110237 				    (unsigned long)hostid);
   1631   3975  ek110237 				error = EBADF;
   1632   3975  ek110237 				goto out;
   1633   3975  ek110237 			}
   1634   1544  eschrock 		}
   1635    789    ahrens 
   1636  10594    George 		spa_config_set(spa, nvconfig);
   1637    789    ahrens 		spa_unload(spa);
   1638    789    ahrens 		spa_deactivate(spa);
   1639   8241      Jeff 		spa_activate(spa, orig_mode);
   1640    789    ahrens 
   1641  10921       Tim 		return (spa_load(spa, state, B_TRUE));
   1642    789    ahrens 	}
   1643    789    ahrens 
   1644   1544  eschrock 	if (zap_lookup(spa->spa_meta_objset,
   1645    789    ahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
   1646  10922      Jeff 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) {
   1647   2082  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1648   2082  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1649   2082  eschrock 		error = EIO;
   1650   2082  eschrock 		goto out;
   1651   2082  eschrock 	}
   1652   2082  eschrock 
   1653   2082  eschrock 	/*
   1654   2082  eschrock 	 * Load the bit that tells us to use the new accounting function
   1655   2082  eschrock 	 * (raid-z deflation).  If we have an older pool, this will not
   1656   2082  eschrock 	 * be present.
   1657   2082  eschrock 	 */
   1658   2082  eschrock 	error = zap_lookup(spa->spa_meta_objset,
   1659   2082  eschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
   1660   2082  eschrock 	    sizeof (uint64_t), 1, &spa->spa_deflate);
   1661   2082  eschrock 	if (error != 0 && error != ENOENT) {
   1662   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1663   1544  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1664   1544  eschrock 		error = EIO;
   1665   1544  eschrock 		goto out;
   1666   1544  eschrock 	}
   1667    789    ahrens 
   1668    789    ahrens 	/*
   1669   1544  eschrock 	 * Load the persistent error log.  If we have an older pool, this will
   1670   1544  eschrock 	 * not be present.
   1671    789    ahrens 	 */
   1672   1544  eschrock 	error = zap_lookup(spa->spa_meta_objset,
   1673   1544  eschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
   1674   1544  eschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_last);
   1675   1807   bonwick 	if (error != 0 && error != ENOENT) {
   1676   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1677   1544  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1678   1544  eschrock 		error = EIO;
   1679   1544  eschrock 		goto out;
   1680   1544  eschrock 	}
   1681   1544  eschrock 
   1682   1544  eschrock 	error = zap_lookup(spa->spa_meta_objset,
   1683   1544  eschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
   1684   1544  eschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_scrub);
   1685   2926  ek110237 	if (error != 0 && error != ENOENT) {
   1686   2926  ek110237 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1687   2926  ek110237 		    VDEV_AUX_CORRUPT_DATA);
   1688   2926  ek110237 		error = EIO;
   1689   2926  ek110237 		goto out;
   1690   2926  ek110237 	}
   1691   2926  ek110237 
   1692   2926  ek110237 	/*
   1693   2926  ek110237 	 * Load the history object.  If we have an older pool, this
   1694   2926  ek110237 	 * will not be present.
   1695   2926  ek110237 	 */
   1696   2926  ek110237 	error = zap_lookup(spa->spa_meta_objset,
   1697   2926  ek110237 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY,
   1698   2926  ek110237 	    sizeof (uint64_t), 1, &spa->spa_history);
   1699   1544  eschrock 	if (error != 0 && error != ENOENT) {
   1700   1544  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1701   1544  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1702   1544  eschrock 		error = EIO;
   1703   1544  eschrock 		goto out;
   1704   2082  eschrock 	}
   1705   2082  eschrock 
   1706   2082  eschrock 	/*
   1707   2082  eschrock 	 * Load any hot spares for this pool.
   1708   2082  eschrock 	 */
   1709   2082  eschrock 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
   1710   5450   brendan 	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object);
   1711   2082  eschrock 	if (error != 0 && error != ENOENT) {
   1712   2082  eschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1713   2082  eschrock 		    VDEV_AUX_CORRUPT_DATA);
   1714   2082  eschrock 		error = EIO;
   1715   2082  eschrock 		goto out;
   1716   2082  eschrock 	}
   1717   2082  eschrock 	if (error == 0) {
   1718   4577    ahrens 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
   1719   5450   brendan 		if (load_nvlist(spa, spa->spa_spares.sav_object,
   1720   5450   brendan 		    &spa->spa_spares.sav_config) != 0) {
   1721   2082  eschrock 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1722   2082  eschrock 			    VDEV_AUX_CORRUPT_DATA);
   1723   2082  eschrock 			error = EIO;
   1724   2082  eschrock 			goto out;
   1725   2082  eschrock 		}
   1726   2082  eschrock 
   1727   7754      Jeff 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1728   7754      Jeff 		spa_load_spares(spa);
   1729   7754      Jeff 		spa_config_exit(spa, SCL_ALL, FTAG);
   1730   5450   brendan 	}
   1731   5450   brendan 
   1732   5450   brendan 	/*
   1733   5450   brendan 	 * Load any level 2 ARC devices for this pool.
   1734   5450   brendan 	 */
   1735   5450   brendan 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
   1736   5450   brendan 	    DMU_POOL_L2CACHE, sizeof (uint64_t), 1,
   1737   5450   brendan 	    &spa->spa_l2cache.sav_object);
   1738   5450   brendan 	if (error != 0 && error != ENOENT) {
   1739   5450   brendan 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1740   5450   brendan 		    VDEV_AUX_CORRUPT_DATA);
   1741   5450   brendan 		error = EIO;
   1742   5450   brendan 		goto out;
   1743   5450   brendan 	}
   1744   5450   brendan 	if (error == 0) {
   1745   5450   brendan 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
   1746   5450   brendan 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
   1747   5450   brendan 		    &spa->spa_l2cache.sav_config) != 0) {
   1748   5450   brendan 			vdev_set_state(rvd, B_TRUE,
   1749   5450   brendan 			    VDEV_STATE_CANT_OPEN,
   1750   5450   brendan 			    VDEV_AUX_CORRUPT_DATA);
   1751   5450   brendan 			error = EIO;
   1752   5450   brendan 			goto out;
   1753   5450   brendan 		}
   1754   5450   brendan 
   1755   7754      Jeff 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1756   7754      Jeff 		spa_load_l2cache(spa);
   1757   7754      Jeff 		spa_config_exit(spa, SCL_ALL, FTAG);
   1758   1544  eschrock 	}
   1759   1544  eschrock 
   1760   5094     lling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
   1761   4543     marks 
   1762   3912     lling 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
   1763   3912     lling 	    DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object);
   1764   3912     lling 
   1765   3912     lling 	if (error && error != ENOENT) {
   1766   3912     lling 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1767   3912     lling 		    VDEV_AUX_CORRUPT_DATA);
   1768   3912     lling 		error = EIO;
   1769   3912     lling 		goto out;
   1770   3912     lling 	}
   1771   3912     lling 
   1772   3912     lling 	if (error == 0) {
   1773   3912     lling 		(void) zap_lookup(spa->spa_meta_objset,
   1774   3912     lling 		    spa->spa_pool_props_object,
   1775   4451  eschrock 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
   1776   3912     lling 		    sizeof (uint64_t), 1, &spa->spa_bootfs);
   1777   4451  eschrock 		(void) zap_lookup(spa->spa_meta_objset,
   1778   4451  eschrock 		    spa->spa_pool_props_object,
   1779   4451  eschrock 		    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE),
   1780   4451  eschrock 		    sizeof (uint64_t), 1, &autoreplace);
   1781  10672      Eric 		spa->spa_autoreplace = (autoreplace != 0);
   1782   4543     marks 		(void) zap_lookup(spa->spa_meta_objset,
   1783   4543     marks 		    spa->spa_pool_props_object,
   1784   4543     marks 		    zpool_prop_to_name(ZPOOL_PROP_DELEGATION),
   1785   4543     marks 		    sizeof (uint64_t), 1, &spa->spa_delegation);
   1786   5329   gw25295 		(void) zap_lookup(spa->spa_meta_objset,
   1787   5329   gw25295 		    spa->spa_pool_props_object,
   1788   5329   gw25295 		    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
   1789   5329   gw25295 		    sizeof (uint64_t), 1, &spa->spa_failmode);
   1790   9816    George 		(void) zap_lookup(spa->spa_meta_objset,
   1791   9816    George 		    spa->spa_pool_props_object,
   1792   9816    George 		    zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
   1793   9816    George 		    sizeof (uint64_t), 1, &spa->spa_autoexpand);
   1794  10922      Jeff 		(void) zap_lookup(spa->spa_meta_objset,
   1795  10922      Jeff 		    spa->spa_pool_props_object,
   1796  10922      Jeff 		    zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO),
   1797  10922      Jeff 		    sizeof (uint64_t), 1, &spa->spa_dedup_ditto);
   1798   3912     lling 	}
   1799   4451  eschrock 
   1800   4451  eschrock 	/*
   1801   4451  eschrock 	 * If the 'autoreplace' property is set, then post a resource notifying
   1802   4451  eschrock 	 * the ZFS DE that it should not issue any faults for unopenable
   1803   4451  eschrock 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
   1804   4451  eschrock 	 * unopenable vdevs so that the normal autoreplace handler can take
   1805   4451  eschrock 	 * over.
   1806   4451  eschrock 	 */
   1807  10672      Eric 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
   1808   4451  eschrock 		spa_check_removed(spa->spa_root_vdev);
   1809  10672      Eric 		/*
   1810  10672      Eric 		 * For the import case, this is done in spa_import(), because
   1811  10672      Eric 		 * at this point we're using the spare definitions from
   1812  10672      Eric 		 * the MOS config, not necessarily from the userland config.
   1813  10672      Eric 		 */
   1814  10672      Eric 		if (state != SPA_LOAD_IMPORT) {
   1815  10672      Eric 			spa_aux_check_removed(&spa->spa_spares);
   1816  10672      Eric 			spa_aux_check_removed(&spa->spa_l2cache);
   1817  10672      Eric 		}
   1818  10672      Eric 	}
   1819   3912     lling 
   1820   1544  eschrock 	/*
   1821   1986  eschrock 	 * Load the vdev state for all toplevel vdevs.
   1822   1544  eschrock 	 */
   1823   1986  eschrock 	vdev_load(rvd);
   1824    789    ahrens 
   1825    789    ahrens 	/*
   1826    789    ahrens 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
   1827    789    ahrens 	 */
   1828   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   1829    789    ahrens 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
   1830   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   1831    789    ahrens 
   1832    789    ahrens 	/*
   1833    789    ahrens 	 * Check the state of the root vdev.  If it can't be opened, it
   1834    789    ahrens 	 * indicates one or more toplevel vdevs are faulted.
   1835    789    ahrens 	 */
   1836   1544  eschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
   1837   1544  eschrock 		error = ENXIO;
   1838   1544  eschrock 		goto out;
   1839   1544  eschrock 	}
   1840    789    ahrens 
   1841  10922      Jeff 	/*
   1842  10922      Jeff 	 * Load the DDTs (dedup tables).
   1843  10922      Jeff 	 */
   1844  10922      Jeff 	error = ddt_load(spa);
   1845  10922      Jeff 	if (error != 0) {
   1846  10922      Jeff 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1847  10922      Jeff 		    VDEV_AUX_CORRUPT_DATA);
   1848  10922      Jeff 		error = EIO;
   1849  10922      Jeff 		goto out;
   1850  10922      Jeff 	}
   1851  10922      Jeff 
   1852  10956    George 	spa_update_dspace(spa);
   1853  10956    George 
   1854  10921       Tim 	if (state != SPA_LOAD_TRYIMPORT) {
   1855  10921       Tim 		error = spa_load_verify(spa);
   1856  10921       Tim 		if (error) {
   1857  10921       Tim 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1858  10921       Tim 			    VDEV_AUX_CORRUPT_DATA);
   1859  10921       Tim 			goto out;
   1860  10921       Tim 		}
   1861  10921       Tim 	}
   1862  10921       Tim 
   1863  10922      Jeff 	/*
   1864  10922      Jeff 	 * Load the intent log state and check log integrity.
   1865  10922      Jeff 	 */
   1866  10922      Jeff 	VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
   1867  10922      Jeff 	    &nvroot) == 0);
   1868  10922      Jeff 	spa_load_log_state(spa, nvroot);
   1869  10922      Jeff 	nvlist_free(nvconfig);
   1870  10922      Jeff 
   1871  10922      Jeff 	if (spa_check_logs(spa)) {
   1872  10922      Jeff 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
   1873  10922      Jeff 		    VDEV_AUX_BAD_LOG);
   1874  10922      Jeff 		error = ENXIO;
   1875  10922      Jeff 		ereport = FM_EREPORT_ZFS_LOG_REPLAY;
   1876  10922      Jeff 		goto out;
   1877  10922      Jeff 	}
   1878  10922      Jeff 
   1879  10921       Tim 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
   1880  10921       Tim 	    spa->spa_load_max_txg == UINT64_MAX)) {
   1881   1635   bonwick 		dmu_tx_t *tx;
   1882   1635   bonwick 		int need_update = B_FALSE;
   1883   8241      Jeff 
   1884   8241      Jeff 		ASSERT(state != SPA_LOAD_TRYIMPORT);
   1885   1601   bonwick 
   1886   1635   bonwick 		/*
   1887   1635   bonwick 		 * Claim log blocks that haven't been committed yet.
   1888   1635   bonwick 		 * This must all happen in a single txg.
   1889  10922      Jeff 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
   1890  10922      Jeff 		 * invoked from zil_claim_log_block()'s i/o done callback.
   1891  10921       Tim 		 * Price of rollback is that we abandon the log.
   1892   1635   bonwick 		 */
   1893  10922      Jeff 		spa->spa_claiming = B_TRUE;
   1894  10922      Jeff 
   1895   1601   bonwick 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
   1896    789    ahrens 		    spa_first_txg(spa));
   1897   7754      Jeff 		(void) dmu_objset_find(spa_name(spa),
   1898   2417    ahrens 		    zil_claim, tx, DS_FIND_CHILDREN);
   1899    789    ahrens 		dmu_tx_commit(tx);
   1900    789    ahrens 
   1901  10922      Jeff 		spa->spa_claiming = B_FALSE;
   1902  10922      Jeff 
   1903   9701    George 		spa->spa_log_state = SPA_LOG_GOOD;
   1904    789    ahrens 		spa->spa_sync_on = B_TRUE;
   1905    789    ahrens 		txg_sync_start(spa->spa_dsl_pool);
   1906    789    ahrens 
   1907    789    ahrens 		/*
   1908  10922      Jeff 		 * Wait for all claims to sync.  We sync up to the highest
   1909  10922      Jeff 		 * claimed log block birth time so that claimed log blocks
   1910  10922      Jeff 		 * don't appear to be from the future.  spa_claim_max_txg
   1911  10922      Jeff 		 * will have been set for us by either zil_check_log_chain()
   1912  10922      Jeff 		 * (invoked from spa_check_logs()) or zil_claim() above.
   1913  10922      Jeff 		 */
   1914  10922      Jeff 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
   1915   1585   bonwick 
   1916   1585   bonwick 		/*
   1917   1635   bonwick 		 * If the config cache is stale, or we have uninitialized
   1918   1635   bonwick 		 * metaslabs (see spa_vdev_add()), then update the config.
   1919  10100       Lin 		 *
   1920  10100       Lin 		 * If spa_load_verbatim is true, trust the current
   1921  10100       Lin 		 * in-core spa_config and update the disk labels.
   1922   1585   bonwick 		 */
   1923   1635   bonwick 		if (config_cache_txg != spa->spa_config_txg ||
   1924  10921       Tim 		    state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
   1925  10921       Tim 		    state == SPA_LOAD_RECOVER)
   1926   1635   bonwick 			need_update = B_TRUE;
   1927   1635   bonwick 
   1928   8241      Jeff 		for (int c = 0; c < rvd->vdev_children; c++)
   1929   1635   bonwick 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
   1930   1635   bonwick 				need_update = B_TRUE;
   1931   1585   bonwick 
   1932   1585   bonwick 		/*
   1933   1635   bonwick 		 * Update the config cache asychronously in case we're the
   1934   1635   bonwick 		 * root pool, in which case the config cache isn't writable yet.
   1935   1585   bonwick 		 */
   1936   1635   bonwick 		if (need_update)
   1937   1635   bonwick 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
   1938   8241      Jeff 
   1939   8241      Jeff 		/*
   1940   8241      Jeff 		 * Check all DTLs to see if anything needs resilvering.
   1941   8241      Jeff 		 */
   1942   8241      Jeff 		if (vdev_resilver_needed(rvd, NULL, NULL))
   1943   8241      Jeff 			spa_async_request(spa, SPA_ASYNC_RESILVER);
   1944  10298   Matthew 
   1945  10298   Matthew 		/*
   1946  10298   Matthew 		 * Delete any inconsistent datasets.
   1947  10298   Matthew 		 */
   1948  10298   Matthew 		(void) dmu_objset_find(spa_name(spa),
   1949  10298   Matthew 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
   1950  10342     chris 
   1951  10342     chris 		/*
   1952  10342     chris 		 * Clean up any stale temporary dataset userrefs.
   1953  10342     chris 		 */
   1954  10342     chris 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
   1955    789    ahrens 	}
   1956    789    ahrens 
   1957   1544  eschrock 	error = 0;
   1958   1544  eschrock out:
   1959  10921       Tim 
   1960   7046    ahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
   1961   2082  eschrock 	if (error && error != EBADF)
   1962   7294    perrin 		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
   1963  11149    George 
   1964  11149    George 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
   1965   1544  eschrock 	spa->spa_ena = 0;
   1966   1544  eschrock 
   1967   1544  eschrock 	return (error);
   1968    789    ahrens }
   1969    789    ahrens 
   1970  10921       Tim static int
   1971  10921       Tim spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
   1972  10921       Tim {
   1973  10921       Tim 	spa_unload(spa);
   1974  10921       Tim 	spa_deactivate(spa);
   1975  10921       Tim 
   1976  10921       Tim 	spa->spa_load_max_txg--;
   1977  10921       Tim 
   1978  10921       Tim 	spa_activate(spa, spa_mode_global);
   1979  10921       Tim 	spa_async_suspend(spa);
   1980  10921       Tim 
   1981  10921       Tim 	return (spa_load(spa, state, mosconfig));
   1982  10921       Tim }
   1983  10921       Tim 
   1984  10921       Tim static int
   1985  10921       Tim spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
   1986  10921       Tim     uint64_t max_request, boolean_t extreme)
   1987  10921       Tim {
   1988  10921       Tim 	nvlist_t *config = NULL;
   1989  10921       Tim 	int load_error, rewind_error;
   1990  10921       Tim 	uint64_t safe_rollback_txg;
   1991  10921       Tim 	uint64_t min_txg;
   1992  10921       Tim 
   1993  11026       Tim 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
   1994  10921       Tim 		spa->spa_load_max_txg = spa->spa_load_txg;
   1995  11026       Tim 		spa->spa_log_state = SPA_LOG_CLEAR;
   1996  11026       Tim 	} else {
   1997  10921       Tim 		spa->spa_load_max_txg = max_request;
   1998  11026       Tim 	}
   1999  10921       Tim 
   2000  10921       Tim 	load_error = rewind_error = spa_load(spa, state, mosconfig);
   2001  10921       Tim 	if (load_error == 0)
   2002  10921       Tim 		return (0);
   2003  10921       Tim 
   2004  10921       Tim 	if (spa->spa_root_vdev != NULL)
   2005  10921       Tim 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
   2006  10921       Tim 
   2007  10921       Tim 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
   2008  10921       Tim 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
   2009  10921       Tim 
   2010  10921       Tim 	/* specific txg requested */
   2011  10921       Tim 	if (spa->spa_load_max_txg != UINT64_MAX && !extreme) {
   2012  10921       Tim 		nvlist_free(config);
   2013  10921       Tim 		return (load_error);
   2014  10921       Tim 	}
   2015  10921       Tim 
   2016  10921       Tim 	/* Price of rolling back is discarding txgs, including log */
   2017  10921       Tim 	if (state == SPA_LOAD_RECOVER)
   2018  10921       Tim 		spa->spa_log_state = SPA_LOG_CLEAR;
   2019  10921       Tim 
   2020  10921       Tim 	spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
   2021  10921       Tim 	safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE;
   2022  10921       Tim 
   2023  10921       Tim 	min_txg = extreme ? TXG_INITIAL : safe_rollback_txg;
   2024  10921       Tim 	while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) {
   2025  10921       Tim 		if (spa->spa_load_max_txg < safe_rollback_txg)
   2026  10921       Tim 			spa->spa_extreme_rewind = B_TRUE;
   2027  10921       Tim 		rewind_error = spa_load_retry(spa, state, mosconfig);
   2028  10921       Tim 	}
   2029  10921       Tim 
   2030  10921       Tim 	if (config)
   2031  10921       Tim 		spa_rewind_data_to_nvlist(spa, config);
   2032  10921       Tim 
   2033  10921       Tim 	spa->spa_extreme_rewind = B_FALSE;
   2034  10921       Tim 	spa->spa_load_max_txg = UINT64_MAX;
   2035  10921       Tim 
   2036  10921       Tim 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
   2037  10921       Tim 		spa_config_set(spa, config);
   2038  10921       Tim 
   2039  10921       Tim 	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
   2040  10921       Tim }
   2041  10921       Tim 
   2042    789    ahrens /*
   2043    789    ahrens  * Pool Open/Import
   2044    789    ahrens  *
   2045    789    ahrens  * The import case is identical to an open except that the configuration is sent
   2046    789    ahrens  * down from userland, instead of grabbed from the configuration cache.  For the
   2047    789    ahrens  * case of an open, the pool configuration will exist in the
   2048   4451  eschrock  * POOL_STATE_UNINITIALIZED state.
   2049    789    ahrens  *
   2050    789    ahrens  * The stats information (gen/count/ustats) is used to gather vdev statistics at
   2051    789    ahrens  * the same time open the pool, without having to keep around the spa_t in some
   2052    789    ahrens  * ambiguous state.
   2053    789    ahrens  */
   2054    789    ahrens static int
   2055  10921       Tim spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
   2056  10921       Tim     nvlist_t **config)
   2057  10921       Tim {
   2058  10921       Tim 	spa_t *spa;
   2059  10921       Tim 	boolean_t norewind;
   2060  10921       Tim 	boolean_t extreme;
   2061  10921       Tim 	zpool_rewind_policy_t policy;
   2062  10921       Tim 	spa_load_state_t state = SPA_LOAD_OPEN;
   2063    789    ahrens 	int error;
   2064    789    ahrens 	int locked = B_FALSE;
   2065    789    ahrens 
   2066    789    ahrens 	*spapp = NULL;
   2067  10921       Tim 
   2068  10921       Tim 	zpool_get_rewind_policy(nvpolicy, &policy);
   2069  10921       Tim 	if (policy.zrp_request & ZPOOL_DO_REWIND)
   2070  10921       Tim 		state = SPA_LOAD_RECOVER;
   2071  10921       Tim 	norewind = (policy.zrp_request == ZPOOL_NO_REWIND);
   2072  10921       Tim 	extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0);
   2073    789    ahrens 
   2074    789    ahrens 	/*
   2075    789    ahrens 	 * As disgusting as this is, we need to support recursive calls to this
   2076    789    ahrens 	 * function because dsl_dir_open() is called during spa_load(), and ends
   2077    789    ahrens 	 * up calling spa_open() again.  The real fix is to figure out how to
   2078    789    ahrens 	 * avoid dsl_dir_open() calling this in the first place.
   2079    789    ahrens 	 */
   2080    789    ahrens 	if (mutex_owner(&spa_namespace_lock) != curthread) {
   2081    789    ahrens 		mutex_enter(&spa_namespace_lock);
   2082    789    ahrens 		locked = B_TRUE;
   2083    789    ahrens 	}
   2084    789    ahrens 
   2085    789    ahrens 	if ((spa = spa_lookup(pool)) == NULL) {
   2086    789    ahrens 		if (locked)
   2087    789    ahrens 			mutex_exit(&spa_namespace_lock);
   2088    789    ahrens 		return (ENOENT);
   2089    789    ahrens 	}
   2090  10921       Tim 
   2091    789    ahrens 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
   2092    789    ahrens 
   2093   8241      Jeff 		spa_activate(spa, spa_mode_global);
   2094    789    ahrens 
   2095  10921       Tim 		if (spa->spa_last_open_failed && norewind) {
   2096  10921       Tim 			if (config != NULL && spa->spa_config)
   2097  10921       Tim 				VERIFY(nvlist_dup(spa->spa_config,
   2098  10921       Tim 				    config, KM_SLEEP) == 0);
   2099  10921       Tim 			spa_deactivate(spa);
   2100  10921       Tim 			if (locked)
   2101  10921       Tim 				mutex_exit(&spa_namespace_lock);
   2102  10921       Tim 			return (spa->spa_last_open_failed);
   2103  10921       Tim 		}
   2104  10921       Tim 
   2105  10921       Tim 		if (state != SPA_LOAD_RECOVER)
   2106  10921       Tim 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
   2107  10921       Tim 
   2108  10921       Tim 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
   2109  10921       Tim 		    extreme);
   2110    789    ahrens 
   2111    789    ahrens 		if (error == EBADF) {
   2112    789    ahrens 			/*
   2113   1986  eschrock 			 * If vdev_validate() returns failure (indicated by
   2114   1986  eschrock 			 * EBADF), it indicates that one of the vdevs indicates
   2115   1986  eschrock 			 * that the pool has been exported or destroyed.  If
   2116   1986  eschrock 			 * this is the case, the config cache is out of sync and
   2117   1986  eschrock 			 * we should remove the pool from the namespace.
   2118    789    ahrens 			 */
   2119   6643  eschrock 			spa_unload(spa);
   2120   6643  eschrock 			spa_deactivate(spa);
   2121   6643  eschrock 			spa_config_sync(spa, B_TRUE, B_TRUE);
   2122    789    ahrens 			spa_remove(spa);
   2123    789    ahrens 			if (locked)
   2124    789    ahrens 				mutex_exit(&spa_namespace_lock);
   2125    789    ahrens 			return (ENOENT);
   2126   1544  eschrock 		}
   2127   1544  eschrock 
   2128   1544  eschrock 		if (error) {
   2129    789    ahrens 			/*
   2130    789    ahrens 			 * We can't open the pool, but we still have useful
   2131    789    ahrens 			 * information: the state of each vdev after the
   2132    789    ahrens 			 * attempted vdev_open().  Return this to the user.
   2133    789    ahrens 			 */
   2134  10921       Tim 			if (config != NULL && spa->spa_config)
   2135  10921       Tim 				VERIFY(nvlist_dup(spa->spa_config, config,
   2136  10921       Tim 				    KM_SLEEP) == 0);
   2137    789    ahrens 			spa_unload(spa);
   2138    789    ahrens 			spa_deactivate(spa);
   2139  10921       Tim 			spa->spa_last_open_failed = error;
   2140    789    ahrens 			if (locked)
   2141    789    ahrens 				mutex_exit(&spa_namespace_lock);
   2142    789    ahrens 			*spapp = NULL;
   2143    789    ahrens 			return (error);
   2144  10921       Tim 		}
   2145  10921       Tim 
   2146    789    ahrens 	}
   2147    789    ahrens 
   2148    789    ahrens 	spa_open_ref(spa, tag);
   2149   4451  eschrock 
   2150    789    ahrens 
   2151   7754      Jeff 	if (config != NULL)
   2152    789    ahrens 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
   2153    789    ahrens 
   2154  11026       Tim 	if (locked) {
   2155  11026       Tim 		spa->spa_last_open_failed = 0;
   2156  11026       Tim 		spa->spa_last_ubsync_txg = 0;
   2157  11026       Tim 		spa->spa_load_txg = 0;
   2158  11026       Tim 		mutex_exit(&spa_namespace_lock);
   2159  11026       Tim 	}
   2160  10921       Tim 
   2161  10921       Tim 	*spapp = spa;
   2162  10921       Tim 
   2163  10921       Tim 	return (0);
   2164  10921       Tim }
   2165  10921       Tim 
   2166  10921       Tim int
   2167  10921       Tim spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
   2168  10921       Tim     nvlist_t **config)
   2169  10921       Tim {
   2170  10921       Tim 	return (spa_open_common(name, spapp, tag, policy, config));
   2171    789    ahrens }
   2172    789    ahrens 
   2173    789    ahrens int
   2174    789    ahrens spa_open(const char *name, spa_t **spapp, void *tag)
   2175    789    ahrens {
   2176  10921       Tim 	return (spa_open_common(name, spapp, tag, NULL, NULL));
   2177    789    ahrens }
   2178    789    ahrens 
   2179   1544  eschrock /*
   2180   1544  eschrock  * Lookup the given spa_t, incrementing the inject count in the process,
   2181   1544  eschrock  * preventing it from being exported or destroyed.
   2182   1544  eschrock  */
   2183   1544  eschrock spa_t *
   2184   1544  eschrock spa_inject_addref(char *name)
   2185   1544  eschrock {
   2186   1544  eschrock 	spa_t *spa;
   2187   1544  eschrock 
   2188   1544  eschrock 	mutex_enter(&spa_namespace_lock);
   2189   1544  eschrock 	if ((spa = spa_lookup(name)) == NULL) {
   2190   1544  eschrock 		mutex_exit(&spa_namespace_lock);
   2191   1544  eschrock 		return (NULL);
   2192   1544  eschrock 	}
   2193   1544  eschrock 	spa->spa_inject_ref++;
   2194   1544  eschrock 	mutex_exit(&spa_namespace_lock);
   2195   1544  eschrock 
   2196   1544  eschrock 	return (spa);
   2197   1544  eschrock }
   2198   1544  eschrock 
   2199   1544  eschrock void
   2200   1544  eschrock spa_inject_delref(spa_t *spa)
   2201   1544  eschrock {
   2202   1544  eschrock 	mutex_enter(&spa_namespace_lock);
   2203   1544  eschrock 	spa->spa_inject_ref--;
   2204   1544  eschrock 	mutex_exit(&spa_namespace_lock);
   2205   1544  eschrock }
   2206   1544  eschrock 
   2207   5450   brendan /*
   2208   5450   brendan  * Add spares device information to the nvlist.
   2209   5450   brendan  */
   2210   2082  eschrock static void
   2211   2082  eschrock spa_add_spares(spa_t *spa, nvlist_t *config)
   2212   2082  eschrock {
   2213   2082  eschrock 	nvlist_t **spares;
   2214   2082  eschrock 	uint_t i, nspares;
   2215   2082  eschrock 	nvlist_t *nvroot;
   2216   2082  eschrock 	uint64_t guid;
   2217   2082  eschrock 	vdev_stat_t *vs;
   2218   2082  eschrock 	uint_t vsc;
   2219   3377  eschrock 	uint64_t pool;
   2220   2082  eschrock 
   2221   9425      Eric 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
   2222   9425      Eric 
   2223   5450   brendan 	if (spa->spa_spares.sav_count == 0)
   2224   2082  eschrock 		return;
   2225   2082  eschrock 
   2226   2082  eschrock 	VERIFY(nvlist_lookup_nvlist(config,
   2227   2082  eschrock 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
   2228   5450   brendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
   2229   2082  eschrock 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
   2230   2082  eschrock 	if (nspares != 0) {
   2231   2082  eschrock 		VERIFY(nvlist_add_nvlist_array(nvroot,
   2232   2082  eschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
   2233   2082  eschrock 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
   2234   2082  eschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
   2235   2082  eschrock 
   2236   2082  eschrock 		/*
   2237   2082  eschrock 		 * Go through and find any spares which have since been
   2238   2082  eschrock 		 * repurposed as an active spare.  If this is the case, update
   2239   2082  eschrock 		 * their status appropriately.
   2240   2082  eschrock 		 */
   2241   2082  eschrock 		for (i = 0; i < nspares; i++) {
   2242   2082  eschrock 			VERIFY(nvlist_lookup_uint64(spares[i],
   2243   2082  eschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
   2244   7214     lling 			if (spa_spare_exists(guid, &pool, NULL) &&
   2245   7214     lling 			    pool != 0ULL) {
   2246   2082  eschrock 				VERIFY(nvlist_lookup_uint64_array(
   2247   2082  eschrock 				    spares[i], ZPOOL_CONFIG_STATS,
   2248   2082  eschrock 				    (uint64_t **)&vs, &vsc) == 0);
   2249   2082  eschrock 				vs->vs_state = VDEV_STATE_CANT_OPEN;
   2250   2082  eschrock 				vs->vs_aux = VDEV_AUX_SPARED;
   2251   2082  eschrock 			}
   2252   2082  eschrock 		}
   2253   2082  eschrock 	}
   2254   2082  eschrock }
   2255   2082  eschrock 
   2256   5450   brendan /*
   2257   5450   brendan  * Add l2cache device information to the nvlist, including vdev stats.
   2258   5450   brendan  */
   2259   5450   brendan static void
   2260   5450   brendan spa_add_l2cache(spa_t *spa, nvlist_t *config)
   2261   5450   brendan {
   2262   5450   brendan 	nvlist_t **l2cache;
   2263   5450   brendan 	uint_t i, j, nl2cache;
   2264   5450   brendan 	nvlist_t *nvroot;
   2265   5450   brendan 	uint64_t guid;
   2266   5450   brendan 	vdev_t *vd;
   2267   5450   brendan 	vdev_stat_t *vs;
   2268   5450   brendan 	uint_t vsc;
   2269   5450   brendan 
   2270   9425      Eric 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
   2271   9425      Eric 
   2272   5450   brendan 	if (spa->spa_l2cache.sav_count == 0)
   2273   5450   brendan 		return;
   2274   5450   brendan 
   2275   5450   brendan 	VERIFY(nvlist_lookup_nvlist(config,
   2276   5450   brendan 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
   2277   5450   brendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
   2278   5450   brendan 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
   2279   5450   brendan 	if (nl2cache != 0) {
   2280   5450   brendan 		VERIFY(nvlist_add_nvlist_array(nvroot,
   2281   5450   brendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
   2282   5450   brendan 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
   2283   5450   brendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
   2284   5450   brendan 
   2285   5450   brendan 		/*
   2286   5450   brendan 		 * Update level 2 cache device stats.
   2287   5450   brendan 		 */
   2288   5450   brendan 
   2289   5450   brendan 		for (i = 0; i < nl2cache; i++) {
   2290   5450   brendan 			VERIFY(nvlist_lookup_uint64(l2cache[i],
   2291   5450   brendan 			    ZPOOL_CONFIG_GUID, &guid) == 0);
   2292   5450   brendan 
   2293   5450   brendan 			vd = NULL;
   2294   5450   brendan 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
   2295   5450   brendan 				if (guid ==
   2296   5450   brendan 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
   2297   5450   brendan 					vd = spa->spa_l2cache.sav_vdevs[j];
   2298   5450   brendan 					break;
   2299   5450   brendan 				}
   2300   5450   brendan 			}
   2301   5450   brendan 			ASSERT(vd != NULL);
   2302   5450   brendan 
   2303   5450   brendan 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
   2304   5450   brendan 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
   2305   5450   brendan 			vdev_get_stats(vd, vs);
   2306   5450   brendan 		}
   2307   5450   brendan 	}
   2308   5450   brendan }
   2309   5450   brendan 
   2310    789    ahrens int
   2311   1544  eschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
   2312    789    ahrens {
   2313    789    ahrens 	int error;
   2314    789    ahrens 	spa_t *spa;
   2315    789    ahrens 
   2316    789    ahrens 	*config = NULL;
   2317  10921       Tim 	error = spa_open_common(name, &spa, FTAG, NULL, config);
   2318   1544  eschrock 
   2319   9425      Eric 	if (spa != NULL) {
   2320   9425      Eric 		/*
   2321   9425      Eric 		 * This still leaves a window of inconsistency where the spares
   2322   9425      Eric 		 * or l2cache devices could change and the config would be
   2323   9425      Eric 		 * self-inconsistent.
   2324   9425      Eric 		 */
   2325   9425      Eric 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
   2326   9425      Eric 
   2327   9425      Eric 		if (*config != NULL) {
   2328   7754      Jeff 			VERIFY(nvlist_add_uint64(*config,
   2329   9425      Eric 			    ZPOOL_CONFIG_ERRCOUNT,
   2330   9425      Eric 			    spa_get_errlog_size(spa)) == 0);
   2331   9425      Eric 
   2332   9425      Eric 			if (spa_suspended(spa))
   2333   9425      Eric 				VERIFY(nvlist_add_uint64(*config,
   2334   9425      Eric 				    ZPOOL_CONFIG_SUSPENDED,
   2335   9425      Eric 				    spa->spa_failmode) == 0);
   2336   9425      Eric 
   2337   9425      Eric 			spa_add_spares(spa, *config);
   2338   9425      Eric 			spa_add_l2cache(spa, *config);
   2339   9425      Eric 		}
   2340   2082  eschrock 	}
   2341   1544  eschrock 
   2342   1544  eschrock 	/*
   2343   1544  eschrock 	 * We want to get the alternate root even for faulted pools, so we cheat
   2344   1544  eschrock 	 * and call spa_lookup() directly.
   2345   1544  eschrock 	 */
   2346   1544  eschrock 	if (altroot) {
   2347   1544  eschrock 		if (spa == NULL) {
   2348   1544  eschrock 			mutex_enter(&spa_namespace_lock);
   2349   1544  eschrock 			spa = spa_lookup(name);
   2350   1544  eschrock 			if (spa)
   2351   1544  eschrock 				spa_altroot(spa, altroot, buflen);
   2352   1544  eschrock 			else
   2353   1544  eschrock 				altroot[0] = '\0';
   2354   1544  eschrock 			spa = NULL;
   2355   1544  eschrock 			mutex_exit(&spa_namespace_lock);
   2356   1544  eschrock 		} else {
   2357   1544  eschrock 			spa_altroot(spa, altroot, buflen);
   2358   1544  eschrock 		}
   2359   1544  eschrock 	}
   2360    789    ahrens 
   2361   9425      Eric 	if (spa != NULL) {
   2362   9425      Eric 		spa_config_exit(spa, SCL_CONFIG, FTAG);
   2363    789    ahrens 		spa_close(spa, FTAG);
   2364   9425      Eric 	}
   2365    789    ahrens 
   2366    789    ahrens 	return (error);
   2367    789    ahrens }
   2368    789    ahrens 
   2369    789    ahrens /*
   2370   5450   brendan  * Validate that the auxiliary device array is well formed.  We must have an
   2371   5450   brendan  * array of nvlists, each which describes a valid leaf vdev.  If this is an
   2372   5450   brendan  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
   2373   5450   brendan  * specified, as long as they are well-formed.
   2374   5450   brendan  */
   2375   5450   brendan static int
   2376   5450   brendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
   2377   5450   brendan     spa_aux_vdev_t *sav, const char *config, uint64_t version,
   2378   5450   brendan     vdev_labeltype_t label)
   2379   5450   brendan {
   2380   5450   brendan 	nvlist_t **dev;
   2381   5450   brendan 	uint_t i, ndev;
   2382   5450   brendan 	vdev_t *vd;
   2383   5450   brendan 	int error;
   2384   5450   brendan 
   2385   7754      Jeff 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
   2386   7754      Jeff 
   2387   5450   brendan 	/*
   2388   5450   brendan 	 * It's acceptable to have no devs specified.
   2389   5450   brendan 	 */
   2390   5450   brendan 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
   2391   5450   brendan 		return (0);
   2392   5450   brendan 
   2393   5450   brendan 	if (ndev == 0)
   2394   2082  eschrock 		return (EINVAL);
   2395   2082  eschrock 
   2396   2082  eschrock 	/*
   2397   5450   brendan 	 * Make sure the pool is formatted with a version that supports this
   2398   5450   brendan 	 * device type.
   2399   5450   brendan 	 */
   2400   5450   brendan 	if (spa_version(spa) < version)
   2401   2082  eschrock 		return (ENOTSUP);
   2402   2082  eschrock 
   2403   3377  eschrock 	/*
   2404   5450   brendan 	 * Set the pending device list so we correctly handle device in-use
   2405   3377  eschrock 	 * checking.
   2406   3377  eschrock 	 */
   2407   5450   brendan 	sav->sav_pending = dev;
   2408   5450   brendan 	sav->sav_npending = ndev;
   2409   5450   brendan 
   2410   5450   brendan 	for (i = 0; i < ndev; i++) {
   2411   5450   brendan 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
   2412   2082  eschrock 		    mode)) != 0)
   2413   3377  eschrock 			goto out;
   2414   2082  eschrock 
   2415   2082  eschrock 		if (!vd->vdev_ops->vdev_op_leaf) {
   2416   2082  eschrock 			vdev_free(vd);
   2417   3377  eschrock 			error = EINVAL;
   2418   3377  eschrock 			goto out;
   2419   2082  eschrock 		}
   2420   2082  eschrock 
   2421   5450   brendan 		/*
   2422   7754      Jeff 		 * The L2ARC currently only supports disk devices in
   2423   7754      Jeff 		 * kernel context.  For user-level testing, we allow it.
   2424   7754      Jeff 		 */
   2425   7754      Jeff #ifdef _KERNEL
   2426   5450   brendan 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
   2427   5450   brendan 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
   2428   5450   brendan 			error = ENOTBLK;
   2429   5450   brendan 			goto out;
   2430   5450   brendan 		}
   2431   7754      Jeff #endif
   2432   2082  eschrock 		vd->vdev_top = vd;
   2433   3377  eschrock 
   2434   3377  eschrock 		if ((error = vdev_open(vd)) == 0 &&
   2435   5450   brendan 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
   2436   5450   brendan 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
   2437   3377  eschrock 			    vd->vdev_guid) == 0);
   2438   2082  eschrock 		}
   2439   2082  eschrock 
   2440   3377  eschrock 		vdev_free(vd);
   2441   2082  eschrock 
   2442   5450   brendan 		if (error &&
   2443   5450   brendan 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
   2444   3377  eschrock 			goto out;
   2445   3377  eschrock 		else
   2446   3377  eschrock 			error = 0;
   2447   2082  eschrock 	}
   2448   2082  eschrock 
   2449   3377  eschrock out:
   2450   5450   brendan 	sav->sav_pending = NULL;
   2451   5450   brendan 	sav->sav_npending = 0;
   2452   5450   brendan 	return (error);
   2453   5450   brendan }
   2454   5450   brendan 
   2455   5450   brendan static int
   2456   5450   brendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
   2457   5450   brendan {
   2458   5450   brendan 	int error;
   2459   7754      Jeff 
   2460   7754      Jeff 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
   2461   5450   brendan 
   2462   5450   brendan 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
   2463   5450   brendan 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
   2464   5450   brendan 	    VDEV_LABEL_SPARE)) != 0) {
   2465   5450   brendan 		return (error);
   2466   5450   brendan 	}
   2467   5450   brendan 
   2468   5450   brendan 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
   2469   5450   brendan 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
   2470   5450   brendan 	    VDEV_LABEL_L2CACHE));
   2471   5450   brendan }
   2472   5450   brendan 
   2473   5450   brendan static void
   2474   5450   brendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
   2475   5450   brendan     const char *config)
   2476   5450   brendan {
   2477   5450   brendan 	int i;
   2478   5450   brendan 
   2479   5450   brendan 	if (sav->sav_config != NULL) {
   2480   5450   brendan 		nvlist_t **olddevs;
   2481   5450   brendan 		uint_t oldndevs;
   2482   5450   brendan 		nvlist_t **newdevs;
   2483   5450   brendan 
   2484   5450   brendan 		/*
   2485   5450   brendan 		 * Generate new dev list by concatentating with the
   2486   5450   brendan 		 * current dev list.
   2487   5450   brendan 		 */
   2488   5450   brendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
   2489   5450   brendan 		    &olddevs, &oldndevs) == 0);
   2490   5450   brendan 
   2491   5450   brendan 		newdevs = kmem_alloc(sizeof (void *) *
   2492   5450   brendan 		    (ndevs + oldndevs), KM_SLEEP);
   2493   5450   brendan 		for (i = 0; i < oldndevs; i++)
   2494   5450   brendan 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
   2495   5450   brendan 			    KM_SLEEP) == 0);
   2496   5450   brendan 		for (i = 0; i < ndevs; i++)
   2497   5450   brendan 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
   2498   5450   brendan 			    KM_SLEEP) == 0);
   2499   5450   brendan 
   2500   5450   brendan 		VERIFY(nvlist_remove(sav->sav_config, config,
   2501   5450   brendan 		    DATA_TYPE_NVLIST_ARRAY) == 0);
   2502   5450   brendan 
   2503   5450   brendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
   2504   5450   brendan 		    config, newdevs, ndevs + oldndevs) == 0);
   2505   5450   brendan 		for (i = 0; i < oldndevs + ndevs; i++)
   2506   5450   brendan 			nvlist_free(newdevs[i]);
   2507   5450   brendan 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
   2508   5450   brendan 	} else {
   2509   5450   brendan 		/*
   2510   5450   brendan 		 * Generate a new dev list.
   2511   5450   brendan 		 */
   2512   5450   brendan 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
   2513   5450   brendan 		    KM_SLEEP) == 0);
   2514   5450   brendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
   2515   5450   brendan 		    devs, ndevs) == 0);
   2516   5450   brendan 	}
   2517   5450   brendan }
   2518   5450   brendan 
   2519   5450   brendan /*
   2520   5450   brendan  * Stop and drop level 2 ARC devices
   2521   5450   brendan  */
   2522   5450   brendan void
   2523   5450   brendan spa_l2cache_drop(spa_t *spa)
   2524   5450   brendan {
   2525   5450   brendan 	vdev_t *vd;
   2526   5450   brendan 	int i;
   2527   5450   brendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
   2528   5450   brendan 
   2529   5450   brendan 	for (i = 0; i < sav->sav_count; i++) {
   2530   5450   brendan 		uint64_t pool;
   2531   5450   brendan 
   2532   5450   brendan 		vd = sav->sav_vdevs[i];
   2533   5450   brendan 		ASSERT(vd != NULL);
   2534   5450   brendan 
   2535   8241      Jeff 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
   2536   8241      Jeff 		    pool != 0ULL && l2arc_vdev_present(vd))
   2537   5450   brendan 			l2arc_remove_vdev(vd);
   2538   5450   brendan 		if (vd->vdev_isl2cache)
   2539   5450   brendan 			spa_l2cache_remove(vd);
   2540   5450   brendan 		vdev_clear_stats(vd);
   2541   5450   brendan 		(void) vdev_close(vd);
   2542   5450   brendan 	}
   2543   2082  eschrock }
   2544   2082  eschrock 
   2545   2082  eschrock /*
   2546    789    ahrens  * Pool Creation
   2547    789    ahrens  */
   2548    789    ahrens int
   2549   5094     lling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
   2550   7184      timh     const char *history_str, nvlist_t *zplprops)
   2551    789    ahrens {
   2552    789    ahrens 	spa_t *spa;
   2553   5094     lling 	char *altroot = NULL;
   2554   1635   bonwick 	vdev_t *rvd;
   2555    789    ahrens 	dsl_pool_t *dp;
   2556    789    ahrens 	dmu_tx_t *tx;
   2557   9816    George 	int error = 0;
   2558    789    ahrens 	uint64_t txg = TXG_INITIAL;
   2559   5450   brendan 	nvlist_t **spares, **l2cache;
   2560   5450   brendan 	uint_t nspares, nl2cache;
   2561   5094     lling 	uint64_t version;
   2562    789    ahrens 
   2563    789    ahrens 	/*
   2564    789    ahrens 	 * If this pool already exists, return failure.
   2565    789    ahrens 	 */
   2566    789    ahrens 	mutex_enter(&spa_namespace_lock);
   2567    789    ahrens 	if (spa_lookup(pool) != NULL) {
   2568    789    ahrens 		mutex_exit(&spa_namespace_lock);
   2569    789    ahrens 		return (EEXIST);
   2570    789    ahrens 	}
   2571    789    ahrens 
   2572    789    ahrens 	/*
   2573    789    ahrens 	 * Allocate a new spa_t structure.
   2574    789    ahrens 	 */
   2575   5094     lling 	(void) nvlist_lookup_string(props,
   2576   5094     lling 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
   2577  10921       Tim 	spa = spa_add(pool, NULL, altroot);
   2578   8241      Jeff 	spa_activate(spa, spa_mode_global);
   2579   1601   bonwick 
   2580   5094     lling 	if (props && (error = spa_prop_validate(spa, props))) {
   2581   5094     lling 		spa_deactivate(spa);
   2582   5094     lling 		spa_remove(spa);
   2583   6643  eschrock 		mutex_exit(&spa_namespace_lock);
   2584   5094     lling 		return (error);
   2585   5094     lling 	}
   2586   5094     lling 
   2587   5094     lling 	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
   2588   5094     lling 	    &version) != 0)
   2589   5094     lling 		version = SPA_VERSION;
   2590   5094     lling 	ASSERT(version <= SPA_VERSION);
   2591  10922      Jeff 
   2592  10922      Jeff 	spa->spa_first_txg = txg;
   2593  10922      Jeff 	spa->spa_uberblock.ub_txg = txg - 1;
   2594   5094     lling 	spa->spa_uberblock.ub_version = version;
   2595    789    ahrens 	spa->spa_ubsync = spa->spa_uberblock;
   2596   9234    George 
   2597   9234    George 	/*
   2598   9234    George 	 * Create "The Godfather" zio to hold all async IOs
   2599   9234    George 	 */
   2600   9630      Jeff 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
   2601   9630      Jeff 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
   2602    789    ahrens 
   2603   1635   bonwick 	/*
   2604   1635   bonwick 	 * Create the root vdev.
   2605   1635   bonwick 	 */
   2606   7754      Jeff 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   2607   1635   bonwick 
   2608   2082  eschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
   2609   1635   bonwick 
   2610   2082  eschrock 	ASSERT(error != 0 || rvd != NULL);
   2611   2082  eschrock 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
   2612   1635   bonwick 
   2613   5913    perrin 	if (error == 0 && !zfs_allocatable_devs(nvroot))
   2614   1635   bonwick 		error = EINVAL;
   2615   2082  eschrock 
   2616   2082  eschrock 	if (error == 0 &&
   2617   2082  eschrock 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
   2618   5450   brendan 	    (error = spa_validate_aux(spa, nvroot, txg,
   2619   2082  eschrock 	    VDEV_ALLOC_ADD)) == 0) {
   2620   9816    George 		for (int c = 0; c < rvd->vdev_children; c++) {
   2621   9816    George 			vdev_metaslab_set_size(rvd->vdev_child[c]);
   2622   9816    George 			vdev_expand(rvd->vdev_child[c], txg);
   2623   9816    George 		}
   2624   1635   bonwick 	}
   2625   1635   bonwick 
   2626   7754      Jeff 	spa_config_exit(spa, SCL_ALL, FTAG);
   2627    789    ahrens 
   2628   2082  eschrock 	if (error != 0) {
   2629    789    ahrens 		spa_unload(spa);
   2630    789    ahrens 		spa_deactivate(spa);
   2631    789    ahrens 		spa_remove(spa);
   2632    789    ahrens 		mutex_exit(&spa_namespace_lock);
   2633    789    ahrens 		return (error);
   2634   2082  eschrock 	}
   2635   2082  eschrock 
   2636   2082  eschrock 	/*
   2637   2082  eschrock 	 * Get the list of spares, if specified.
   2638   2082  eschrock 	 */
   2639   2082  eschrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
   2640   2082  eschrock 	    &spares, &nspares) == 0) {
   2641   5450   brendan 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
   2642   2082  eschrock 		    KM_SLEEP) == 0);
   2643   5450   brendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
   2644   2082  eschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
   2645   7754      Jeff 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   2646   7754      Jeff 		spa_load_spares(spa);
   2647   7754      Jeff 		spa_config_exit(spa, SCL_ALL, FTAG);
   2648   5450   brendan 		spa->spa_spares.sav_sync = B_TRUE;
   2649   5450   brendan 	}
   2650   5450   brendan 
   2651   5450   brendan 	/*
   2652   5450   brendan 	 * Get the list of level 2 cache devices, if specified.
   2653   5450   brendan 	 */
   2654   5450   brendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
   2655   5450   brendan 	    &l2cache, &nl2cache) == 0) {
   2656   5450   brendan 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
   2657   5450   brendan 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
   2658   5450   brendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
   2659   5450   brendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
   2660   7754      Jeff 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   2661   7754      Jeff 		spa_load_l2cache(spa);
   2662   7754      Jeff 		spa_config_exit(spa, SCL_ALL, FTAG);
   2663   5450   brendan 		spa->spa_l2cache.sav_sync = B_TRUE;
   2664    789    ahrens 	}
   2665    789    ahrens 
   2666   7184      timh 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
   2667    789    ahrens 	spa->spa_meta_objset = dp->dp_meta_objset;
   2668    789    ahrens 
   2669  10956    George 	/*
   2670  10956    George 	 * Create DDTs (dedup tables).
   2671  10956    George 	 */
   2672  10956    George 	ddt_create(spa);
   2673  10956    George 
   2674  10956    George 	spa_update_dspace(spa);
   2675  10956    George 
   2676    789    ahrens 	tx = dmu_tx_create_assigned(dp, txg);
   2677    789    ahrens 
   2678    789    ahrens 	/*
   2679    789    ahrens 	 * Create the pool config object.
   2680    789    ahrens 	 */
   2681    789    ahrens 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
   2682   7497       Tim 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
   2683    789    ahrens 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
   2684    789    ahrens 
   2685   1544  eschrock 	if (zap_add(spa->spa_meta_objset,
   2686    789    ahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
   2687   1544  eschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
   2688   1544  eschrock 		cmn_err(CE_PANIC, "failed to add pool config");
   2689   2082  eschrock 	}
   2690   2082  eschrock 
   2691   5094     lling 	/* Newly created pools with the right version are always deflated. */
   2692   5094     lling 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
   2693   5094     lling 		spa->spa_deflate = TRUE;
   2694   5094     lling 		if (zap_add(spa->spa_meta_objset,
   2695   5094     lling 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
   2696   5094     lling 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
   2697   5094     lling 			cmn_err(CE_PANIC, "failed to add deflate");
   2698   5094     lling 		}
   2699   1544  eschrock 	}
   2700    789    ahrens 
   2701    789    ahrens 	/*
   2702    789    ahrens 	 * Create the deferred-free bplist object.  Turn off compression
   2703    789    ahrens 	 * because sync-to-convergence takes longer if the blocksize
   2704    789    ahrens 	 * keeps changing.
   2705    789    ahrens 	 */
   2706  10922      Jeff 	spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset,
   2707    789    ahrens 	    1 << 14, tx);
   2708  10922      Jeff 	dmu_object_set_compress(spa->spa_meta_objset,
   2709  10922      Jeff 	    spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx);
   2710    789    ahrens 
   2711   1544  eschrock 	if (zap_add(spa->spa_meta_objset,
   2712    789    ahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
   2713  10922      Jeff 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) {
   2714   1544  eschrock 		cmn_err(CE_PANIC, "failed to add bplist");
   2715   1544  eschrock 	}
   2716   2926  ek110237 
   2717   2926  ek110237 	/*
   2718   2926  ek110237 	 * Create the pool's history object.
   2719   2926  ek110237 	 */
   2720   5094     lling 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
   2721   5094     lling 		spa_history_create_obj(spa, tx);
   2722   5094     lling 
   2723   5094     lling 	/*
   2724   5094     lling 	 * Set pool properties.
   2725   5094     lling 	 */
   2726   5094     lling 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
   2727   5094     lling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
   2728   5329   gw25295 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
   2729   9816    George 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
   2730  10922      Jeff 
   2731   8525      Eric 	if (props != NULL) {
   2732   8525      Eric 		spa_configfile_set(spa, props, B_FALSE);
   2733   5094     lling 		spa_sync_props(spa, props, CRED(), tx);
   2734   8525      Eric 	}
   2735    789    ahrens 
   2736    789    ahrens 	dmu_tx_commit(tx);
   2737    789    ahrens 
   2738    789    ahrens 	spa->spa_sync_on = B_TRUE;
   2739    789    ahrens 	txg_sync_start(spa->spa_dsl_pool);
   2740    789    ahrens 
   2741    789    ahrens 	/*
   2742    789    ahrens 	 * We explicitly wait for the first transaction to complete so that our
   2743    789    ahrens 	 * bean counters are appropriately updated.
   2744    789    ahrens 	 */
   2745    789    ahrens 	txg_wait_synced(spa->spa_dsl_pool, txg);
   2746    789    ahrens 
   2747   6643  eschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
   2748   4715  ek110237 
   2749   5094     lling 	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
   2750   4715  ek110237 		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
   2751   9946      Mark 	spa_history_log_version(spa, LOG_POOL_CREATE);
   2752    789    ahrens 
   2753   7046    ahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
   2754   8667    George 
   2755   4451  eschrock 	mutex_exit(&spa_namespace_lock);
   2756    789    ahrens 
   2757    789    ahrens 	return (0);
   2758   6423   gw25295 }
   2759   6423   gw25295 
   2760   6423   gw25295 #ifdef _KERNEL
   2761   6423   gw25295 /*
   2762   9790       Lin  * Get the root pool information from the root disk, then import the root pool
   2763   9790       Lin  * during the system boot up time.
   2764   9790       Lin  */
   2765   9790       Lin extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
   2766   9790       Lin 
   2767   9790       Lin static nvlist_t *
   2768   9790       Lin spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
   2769   9790       Lin {
   2770   9790       Lin 	nvlist_t *config;
   2771   6423   gw25295 	nvlist_t *nvtop, *nvroot;
   2772   6423   gw25295 	uint64_t pgid;
   2773   6423   gw25295 
   2774   9790       Lin 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
   2775   9790       Lin 		return (NULL);
   2776   9790       Lin 
   2777   6423   gw25295 	/*
   2778   6423   gw25295 	 * Add this top-level vdev to the child array.
   2779   6423   gw25295 	 */
   2780   9790       Lin 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
   2781   9790       Lin 	    &nvtop) == 0);
   2782   9790       Lin 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
   2783   9790       Lin 	    &pgid) == 0);
   2784   9790       Lin 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
   2785   6423   gw25295 
   2786   6423   gw25295 	/*
   2787   6423   gw25295 	 * Put this pool's top-level vdevs into a root vdev.
   2788   6423   gw25295 	 */
   2789   6423   gw25295 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
   2790   9790       Lin 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
   2791   9790       Lin 	    VDEV_TYPE_ROOT) == 0);
   2792   6423   gw25295 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
   2793   6423   gw25295 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
   2794   6423   gw25295 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
   2795   6423   gw25295 	    &nvtop, 1) == 0);
   2796   6423   gw25295 
   2797   6423   gw25295 	/*
   2798   6423   gw25295 	 * Replace the existing vdev_tree with the new root vdev in
   2799   6423   gw25295 	 * this pool's configuration (remove the old, add the new).
   2800   6423   gw25295 	 */
   2801   6423   gw25295 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
   2802   6423   gw25295 	nvlist_free(nvroot);
   2803   9790       Lin 	return (config);
   2804   9790       Lin }
   2805   9790       Lin 
   2806   9790       Lin /*
   2807   9790       Lin  * Walk the vdev tree and see if we can find a device with "better"
   2808   9790       Lin  * configuration. A configuration is "better" if the label on that
   2809   9790       Lin  * device has a more recent txg.
   2810   9790       Lin  */
   2811   9790       Lin static void
   2812   9790       Lin spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
   2813   9790       Lin {
   2814   9816    George 	for (int c = 0; c < vd->vdev_children; c++)
   2815   9790       Lin 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
   2816   9790       Lin 
   2817   9790       Lin 	if (vd->vdev_ops->vdev_op_leaf) {
   2818   9790       Lin 		nvlist_t *label;
   2819   9790       Lin 		uint64_t label_txg;
   2820   9790       Lin 
   2821   9790       Lin 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
   2822   9790       Lin 		    &label) != 0)
   2823   9790       Lin 			return;
   2824   9790       Lin 
   2825   9790       Lin 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
   2826   9790       Lin 		    &label_txg) == 0);
   2827   9790       Lin 
   2828   9790       Lin 		/*
   2829   9790       Lin 		 * Do we have a better boot device?
   2830   9790       Lin 		 */
   2831   9790       Lin 		if (label_txg > *txg) {
   2832   9790       Lin 			*txg = label_txg;
   2833   9790       Lin 			*avd = vd;
   2834   9790       Lin 		}
   2835   9790       Lin 		nvlist_free(label);
   2836   9790       Lin 	}
   2837   7147    taylor }
   2838   7147    taylor 
   2839   6423   gw25295 /*
   2840   6423   gw25295  * Import a root pool.
   2841   6423   gw25295  *
   2842   7147    taylor  * For x86. devpath_list will consist of devid and/or physpath name of
   2843   7147    taylor  * the vdev (e.g. "id1,sd (at) SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
   2844   7147    taylor  * The GRUB "findroot" command will return the vdev we should boot.
   2845   6423   gw25295  *
   2846   6423   gw25295  * For Sparc, devpath_list consists the physpath name of the booting device
   2847   6423   gw25295  * no matter the rootpool is a single device pool or a mirrored pool.
   2848   6423   gw25295  * e.g.
   2849   6423   gw25295  *	"/pci@1f,0/ide@d/disk@0,0:a"
   2850   6423   gw25295  */
   2851   6423   gw25295 int
   2852   7147    taylor spa_import_rootpool(char *devpath, char *devid)
   2853   6423   gw25295 {
   2854   9790       Lin 	spa_t *spa;
   2855   9790       Lin 	vdev_t *rvd, *bvd, *avd = NULL;
   2856   9790       Lin 	nvlist_t *config, *nvtop;
   2857   9790       Lin 	uint64_t guid, txg;
   2858   6423   gw25295 	char *pname;
   2859   6423   gw25295 	int error;
   2860   9790       Lin 
   2861   9790       Lin 	/*
   2862   9790       Lin 	 * Read the label from the boot device and generate a configuration.
   2863   9790       Lin 	 */
   2864  10822      Jack 	config = spa_generate_rootconf(devpath, devid, &guid);
   2865  10822      Jack #if defined(_OBP) && defined(_KERNEL)
   2866  10822      Jack 	if (config == NULL) {
   2867  10822      Jack 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
   2868  10822      Jack 			/* iscsi boot */
   2869  10822      Jack 			get_iscsi_bootpath_phy(devpath);
   2870  10822      Jack 			config = spa_generate_rootconf(devpath, devid, &guid);
   2871  10822      Jack 		}
   2872  10822      Jack 	}
   2873  10822      Jack #endif
   2874  10822      Jack 	if (config == NULL) {
   2875   9790       Lin 		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
   2876   9790       Lin 		    devpath);
   2877   9790       Lin 		return (EIO);
   2878   9790       Lin 	}
   2879   9790       Lin 
   2880   9790       Lin 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
   2881   9790       Lin 	    &pname) == 0);
   2882   9790       Lin 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
   2883   6423   gw25295 
   2884   9425      Eric 	mutex_enter(&spa_namespace_lock);
   2885   9425      Eric 	if ((spa = spa_lookup(pname)) != NULL) {
   2886   9425      Eric 		/*
   2887   9425      Eric 		 * Remove the existing root pool from the namespace so that we
   2888   9425      Eric 		 * can replace it with the correct config we just read in.
   2889   9425      Eric 		 */
   2890   9425      Eric 		spa_remove(spa);
   2891   9425      Eric 	}
   2892   9425      Eric 
   2893  10921       Tim 	spa = spa_add(pname, config, NULL);
   2894   9425      Eric 	spa->spa_is_root = B_TRUE;
   2895  10100       Lin 	spa->spa_load_verbatim = B_TRUE;
   2896   9790       Lin 
   2897   9790       Lin 	/*
   2898   9790       Lin 	 * Build up a vdev tree based on the boot device's label config.
   2899   9790       Lin 	 */
   2900   9790       Lin 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
   2901   9790       Lin 	    &nvtop) == 0);
   2902   9790       Lin 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   2903   9790       Lin 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
   2904   9790       Lin 	    VDEV_ALLOC_ROOTPOOL);
   2905   9790       Lin 	spa_config_exit(spa, SCL_ALL, FTAG);
   2906   9790       Lin 	if (error) {
   2907   9790       Lin 		mutex_exit(&spa_namespace_lock);
   2908   9790       Lin 		nvlist_free(config);
   2909   9790       Lin 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
   2910   9790       Lin 		    pname);
   2911   9790       Lin 		return (error);
   2912   9790       Lin 	}
   2913   9790       Lin 
   2914   9790       Lin 	/*
   2915   9790       Lin 	 * Get the boot vdev.
   2916   9790       Lin 	 */
   2917   9790       Lin 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
   2918   9790       Lin 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
   2919   9790       Lin 		    (u_longlong_t)guid);
   2920   9790       Lin 		error = ENOENT;
   2921   9790       Lin 		goto out;
   2922   9790       Lin 	}
   2923   9790       Lin 
   2924   9790       Lin 	/*
   2925   9790       Lin 	 * Determine if there is a better boot device.
   2926   9790       Lin 	 */
   2927   9790       Lin 	avd = bvd;
   2928   9790       Lin 	spa_alt_rootvdev(rvd, &avd, &txg);
   2929   9790       Lin 	if (avd != bvd) {
   2930   9790       Lin 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
   2931   9790       Lin 		    "try booting from '%s'", avd->vdev_path);
   2932   9790       Lin 		error = EINVAL;
   2933   9790       Lin 		goto out;
   2934   9790       Lin 	}
   2935   9790       Lin 
   2936   9790       Lin 	/*
   2937   9790       Lin 	 * If the boot device is part of a spare vdev then ensure that
   2938   9790       Lin 	 * we're booting off the active spare.
   2939   9790       Lin 	 */
   2940   9790       Lin 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
   2941   9790       Lin 	    !bvd->vdev_isspare) {
   2942   9790       Lin 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
   2943   9790       Lin 		    "try booting from '%s'",
   2944   9790       Lin 		    bvd->vdev_parent->vdev_child[1]->vdev_path);
   2945   9790       Lin 		error = EINVAL;
   2946   9790       Lin 		goto out;
   2947   9790       Lin 	}
   2948   9790       Lin 
   2949   9790       Lin 	error = 0;
   2950   9946      Mark 	spa_history_log_version(spa, LOG_POOL_IMPORT);
   2951   9790       Lin out:
   2952   9790       Lin 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
   2953   9790       Lin 	vdev_free(rvd);
   2954   9790       Lin 	spa_config_exit(spa, SCL_ALL, FTAG);
   2955   9790       Lin 	mutex_exit(&spa_namespace_lock);
   2956   9790       Lin 
   2957   9790       Lin 	nvlist_free(config);
   2958   9790       Lin 	return (error);
   2959   9790       Lin }
   2960   9790       Lin 
   2961   6423   gw25295 #endif
   2962   6423   gw25295 
   2963   6423   gw25295 /*
   2964   9425      Eric  * Take a pool and insert it into the namespace as if it had been loaded at
   2965   9425      Eric  * boot.
   2966   9425      Eric  */
   2967   9425      Eric int
   2968   9425      Eric spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
   2969   9425      Eric {
   2970   9425      Eric 	spa_t *spa;
   2971  10921       Tim 	zpool_rewind_policy_t policy;
   2972   9425      Eric 	char *altroot = NULL;
   2973   9425      Eric 
   2974   9425      Eric 	mutex_enter(&spa_namespace_lock);
   2975   9425      Eric 	if (spa_lookup(pool) != NULL) {
   2976   9425      Eric 		mutex_exit(&spa_namespace_lock);
   2977   9425      Eric 		return (EEXIST);
   2978   9425      Eric 	}
   2979   9425      Eric 
   2980   9425      Eric 	(void) nvlist_lookup_string(props,
   2981   9425      Eric 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
   2982  10921       Tim 	spa = spa_add(pool, config, altroot);
   2983  10921       Tim 
   2984  10921       Tim 	zpool_get_rewind_policy(config, &policy);
   2985  10921       Tim 	spa->spa_load_max_txg = policy.zrp_txg;
   2986   9425      Eric 
   2987  10100       Lin 	spa->spa_load_verbatim = B_TRUE;
   2988   9425      Eric 
   2989   9425      Eric 	if (props != NULL)
   2990   9425      Eric 		spa_configfile_set(spa, props, B_FALSE);
   2991   9425      Eric 
   2992   9425      Eric 	spa_config_sync(spa, B_FALSE, B_TRUE);
   2993   9425      Eric 
   2994   9425      Eric 	mutex_exit(&spa_namespace_lock);
   2995   9946      Mark 	spa_history_log_version(spa, LOG_POOL_IMPORT);
   2996   9425      Eric 
   2997   9425      Eric 	return (0);
   2998   9425      Eric }
   2999   9425      Eric 
   3000   9425      Eric /*
   3001   6423   gw25295  * Import a non-root pool into the system.
   3002   6423   gw25295  */
   3003   6423   gw25295 int
   3004   6423   gw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
   3005   6423   gw25295 {
   3006   9425      Eric 	spa_t *spa;
   3007   9425      Eric 	char *altroot = NULL;
   3008  10921       Tim 	spa_load_state_t state = SPA_LOAD_IMPORT;
   3009  10921       Tim 	zpool_rewind_policy_t policy;
   3010   9425      Eric 	int error;
   3011   9425      Eric 	nvlist_t *nvroot;
   3012   9425      Eric 	nvlist_t **spares, **l2cache;
   3013   9425      Eric 	uint_t nspares, nl2cache;
   3014   9425      Eric 
   3015   9425      Eric 	/*
   3016 <