1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 2082 eschrock 22 789 ahrens /* 23 8525 Eric * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 789 ahrens * Use is subject to license terms. 25 789 ahrens */ 26 789 ahrens 27 789 ahrens /* 28 789 ahrens * This file contains all the routines used when modifying on-disk SPA state. 29 789 ahrens * This includes opening, importing, destroying, exporting a pool, and syncing a 30 789 ahrens * pool. 31 789 ahrens */ 32 789 ahrens 33 789 ahrens #include <sys/zfs_context.h> 34 1544 eschrock #include <sys/fm/fs/zfs.h> 35 789 ahrens #include <sys/spa_impl.h> 36 789 ahrens #include <sys/zio.h> 37 789 ahrens #include <sys/zio_checksum.h> 38 789 ahrens #include <sys/dmu.h> 39 789 ahrens #include <sys/dmu_tx.h> 40 789 ahrens #include <sys/zap.h> 41 789 ahrens #include <sys/zil.h> 42 10922 Jeff #include <sys/ddt.h> 43 789 ahrens #include <sys/vdev_impl.h> 44 789 ahrens #include <sys/metaslab.h> 45 10594 George #include <sys/metaslab_impl.h> 46 789 ahrens #include <sys/uberblock_impl.h> 47 789 ahrens #include <sys/txg.h> 48 789 ahrens #include <sys/avl.h> 49 789 ahrens #include <sys/dmu_traverse.h> 50 3912 lling #include <sys/dmu_objset.h> 51 789 ahrens #include <sys/unique.h> 52 789 ahrens #include <sys/dsl_pool.h> 53 3912 lling #include <sys/dsl_dataset.h> 54 789 ahrens #include <sys/dsl_dir.h> 55 789 ahrens #include <sys/dsl_prop.h> 56 3912 lling #include <sys/dsl_synctask.h> 57 789 ahrens #include <sys/fs/zfs.h> 58 5450 brendan #include <sys/arc.h> 59 789 ahrens #include <sys/callb.h> 60 3975 ek110237 #include <sys/systeminfo.h> 61 6423 gw25295 #include <sys/spa_boot.h> 62 9816 George #include <sys/zfs_ioctl.h> 63 2986 ek110237 64 8662 Jordan #ifdef _KERNEL 65 11173 Jonathan #include <sys/bootprops.h> 66 11173 Jonathan #include <sys/callb.h> 67 11173 Jonathan #include <sys/cpupart.h> 68 11173 Jonathan #include <sys/pool.h> 69 11173 Jonathan #include <sys/sysdc.h> 70 8662 Jordan #include <sys/zone.h> 71 8662 Jordan #endif /* _KERNEL */ 72 8662 Jordan 73 5094 lling #include "zfs_prop.h" 74 5913 perrin #include "zfs_comutil.h" 75 5094 lling 76 11173 Jonathan typedef enum zti_modes { 77 9515 Jonathan zti_mode_fixed, /* value is # of threads (min 1) */ 78 9515 Jonathan zti_mode_online_percent, /* value is % of online CPUs */ 79 11173 Jonathan zti_mode_batch, /* cpu-intensive; value is ignored */ 80 11146 George zti_mode_null, /* don't create a taskq */ 81 9515 Jonathan zti_nmodes 82 11173 Jonathan } zti_modes_t; 83 9515 Jonathan 84 11146 George #define ZTI_FIX(n) { zti_mode_fixed, (n) } 85 11146 George #define ZTI_PCT(n) { zti_mode_online_percent, (n) } 86 11173 Jonathan #define ZTI_BATCH { zti_mode_batch, 0 } 87 11146 George #define ZTI_NULL { zti_mode_null, 0 } 88 11146 George 89 11146 George #define ZTI_ONE ZTI_FIX(1) 90 9515 Jonathan 91 9515 Jonathan typedef struct zio_taskq_info { 92 11146 George enum zti_modes zti_mode; 93 11146 George uint_t zti_value; 94 9515 Jonathan } zio_taskq_info_t; 95 9515 Jonathan 96 9515 Jonathan static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 97 11173 Jonathan "issue", "issue_high", "intr", "intr_high" 98 9515 Jonathan }; 99 9515 Jonathan 100 11146 George /* 101 11146 George * Define the taskq threads for the following I/O types: 102 11146 George * NULL, READ, WRITE, FREE, CLAIM, and IOCTL 103 11146 George */ 104 11146 George const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 105 11146 George /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 106 11146 George { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 107 11173 Jonathan { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, 108 11173 Jonathan { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, 109 11146 George { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 110 11146 George { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 111 11146 George { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 112 9515 Jonathan }; 113 9515 Jonathan 114 5094 lling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 115 7214 lling static boolean_t spa_has_active_shared_spare(spa_t *spa); 116 11173 Jonathan 117 11173 Jonathan uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ 118 11173 Jonathan id_t zio_taskq_psrset_bind = PS_NONE; 119 11173 Jonathan boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ 120 11173 Jonathan uint_t zio_taskq_basedc = 80; /* base duty cycle */ 121 11173 Jonathan 122 11173 Jonathan boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ 123 11173 Jonathan 124 11173 Jonathan /* 125 11173 Jonathan * This (illegal) pool name is used when temporarily importing a spa_t in order 126 11173 Jonathan * to get the vdev stats associated with the imported devices. 127 11173 Jonathan */ 128 11173 Jonathan #define TRYIMPORT_NAME "$import" 129 5094 lling 130 5094 lling /* 131 5094 lling * ========================================================================== 132 5094 lling * SPA properties routines 133 5094 lling * ========================================================================== 134 5094 lling */ 135 5094 lling 136 5094 lling /* 137 5094 lling * Add a (source=src, propname=propval) list to an nvlist. 138 5094 lling */ 139 5949 lling static void 140 5094 lling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 141 5094 lling uint64_t intval, zprop_source_t src) 142 5094 lling { 143 5094 lling const char *propname = zpool_prop_to_name(prop); 144 5094 lling nvlist_t *propval; 145 5949 lling 146 5949 lling VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 147 5949 lling VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 148 5949 lling 149 5949 lling if (strval != NULL) 150 5949 lling VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 151 5949 lling else 152 5949 lling VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 153 5949 lling 154 5949 lling VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 155 5094 lling nvlist_free(propval); 156 5094 lling } 157 5094 lling 158 5094 lling /* 159 5094 lling * Get property values from the spa configuration. 160 5094 lling */ 161 5949 lling static void 162 5094 lling spa_prop_get_config(spa_t *spa, nvlist_t **nvp) 163 5094 lling { 164 8525 Eric uint64_t size; 165 10956 George uint64_t alloc; 166 5094 lling uint64_t cap, version; 167 5094 lling zprop_source_t src = ZPROP_SRC_NONE; 168 6643 eschrock spa_config_dirent_t *dp; 169 5094 lling 170 7754 Jeff ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 171 7754 Jeff 172 8525 Eric if (spa->spa_root_vdev != NULL) { 173 10956 George alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 174 10922 Jeff size = metaslab_class_get_space(spa_normal_class(spa)); 175 8525 Eric spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 176 8525 Eric spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 177 10956 George spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 178 10956 George spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 179 10956 George size - alloc, src); 180 10956 George 181 10956 George cap = (size == 0) ? 0 : (alloc * 100 / size); 182 8525 Eric spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 183 10922 Jeff 184 10922 Jeff spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 185 10922 Jeff ddt_get_pool_dedup_ratio(spa), src); 186 8525 Eric 187 8525 Eric spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 188 8525 Eric spa->spa_root_vdev->vdev_state, src); 189 8525 Eric 190 8525 Eric version = spa_version(spa); 191 8525 Eric if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 192 8525 Eric src = ZPROP_SRC_DEFAULT; 193 8525 Eric else 194 8525 Eric src = ZPROP_SRC_LOCAL; 195 8525 Eric spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 196 8525 Eric } 197 5949 lling 198 5949 lling spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 199 5949 lling 200 5949 lling if (spa->spa_root != NULL) 201 5949 lling spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 202 5949 lling 0, ZPROP_SRC_LOCAL); 203 5094 lling 204 6643 eschrock if ((dp = list_head(&spa->spa_config_list)) != NULL) { 205 6643 eschrock if (dp->scd_path == NULL) { 206 5949 lling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 207 6643 eschrock "none", 0, ZPROP_SRC_LOCAL); 208 6643 eschrock } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 209 5949 lling spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 210 6643 eschrock dp->scd_path, 0, ZPROP_SRC_LOCAL); 211 5363 eschrock } 212 5949 lling } 213 5094 lling } 214 5094 lling 215 5094 lling /* 216 5094 lling * Get zpool property values. 217 5094 lling */ 218 5094 lling int 219 5094 lling spa_prop_get(spa_t *spa, nvlist_t **nvp) 220 5094 lling { 221 10922 Jeff objset_t *mos = spa->spa_meta_objset; 222 5094 lling zap_cursor_t zc; 223 5094 lling zap_attribute_t za; 224 5094 lling int err; 225 5094 lling 226 5949 lling VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 227 5094 lling 228 7754 Jeff mutex_enter(&spa->spa_props_lock); 229 7754 Jeff 230 5094 lling /* 231 5094 lling * Get properties from the spa config. 232 5094 lling */ 233 5949 lling spa_prop_get_config(spa, nvp); 234 5094 lling 235 5094 lling /* If no pool property object, no more prop to get. */ 236 5094 lling if (spa->spa_pool_props_object == 0) { 237 5094 lling mutex_exit(&spa->spa_props_lock); 238 5094 lling return (0); 239 5094 lling } 240 5094 lling 241 5094 lling /* 242 5094 lling * Get properties from the MOS pool property object. 243 5094 lling */ 244 5094 lling for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 245 5094 lling (err = zap_cursor_retrieve(&zc, &za)) == 0; 246 5094 lling zap_cursor_advance(&zc)) { 247 5094 lling uint64_t intval = 0; 248 5094 lling char *strval = NULL; 249 5094 lling zprop_source_t src = ZPROP_SRC_DEFAULT; 250 5094 lling zpool_prop_t prop; 251 5094 lling 252 5094 lling if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 253 5094 lling continue; 254 5094 lling 255 5094 lling switch (za.za_integer_length) { 256 5094 lling case 8: 257 5094 lling /* integer property */ 258 5094 lling if (za.za_first_integer != 259 5094 lling zpool_prop_default_numeric(prop)) 260 5094 lling src = ZPROP_SRC_LOCAL; 261 5094 lling 262 5094 lling if (prop == ZPOOL_PROP_BOOTFS) { 263 5094 lling dsl_pool_t *dp; 264 5094 lling dsl_dataset_t *ds = NULL; 265 5094 lling 266 5094 lling dp = spa_get_dsl(spa); 267 5094 lling rw_enter(&dp->dp_config_rwlock, RW_READER); 268 6689 maybee if (err = dsl_dataset_hold_obj(dp, 269 6689 maybee za.za_first_integer, FTAG, &ds)) { 270 5094 lling rw_exit(&dp->dp_config_rwlock); 271 5094 lling break; 272 5094 lling } 273 5094 lling 274 5094 lling strval = kmem_alloc( 275 5094 lling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 276 5094 lling KM_SLEEP); 277 5094 lling dsl_dataset_name(ds, strval); 278 6689 maybee dsl_dataset_rele(ds, FTAG); 279 5094 lling rw_exit(&dp->dp_config_rwlock); 280 5094 lling } else { 281 5094 lling strval = NULL; 282 5094 lling intval = za.za_first_integer; 283 5094 lling } 284 5094 lling 285 5949 lling spa_prop_add_list(*nvp, prop, strval, intval, src); 286 5094 lling 287 5094 lling if (strval != NULL) 288 5094 lling kmem_free(strval, 289 5094 lling MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 290 5094 lling 291 5094 lling break; 292 5094 lling 293 5094 lling case 1: 294 5094 lling /* string property */ 295 5094 lling strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 296 5094 lling err = zap_lookup(mos, spa->spa_pool_props_object, 297 5094 lling za.za_name, 1, za.za_num_integers, strval); 298 5094 lling if (err) { 299 5094 lling kmem_free(strval, za.za_num_integers); 300 5094 lling break; 301 5094 lling } 302 5949 lling spa_prop_add_list(*nvp, prop, strval, 0, src); 303 5094 lling kmem_free(strval, za.za_num_integers); 304 5094 lling break; 305 5094 lling 306 5094 lling default: 307 5094 lling break; 308 5094 lling } 309 5094 lling } 310 5094 lling zap_cursor_fini(&zc); 311 5094 lling mutex_exit(&spa->spa_props_lock); 312 5094 lling out: 313 5094 lling if (err && err != ENOENT) { 314 5094 lling nvlist_free(*nvp); 315 5949 lling *nvp = NULL; 316 5094 lling return (err); 317 5094 lling } 318 5094 lling 319 5094 lling return (0); 320 5094 lling } 321 5094 lling 322 5094 lling /* 323 5094 lling * Validate the given pool properties nvlist and modify the list 324 5094 lling * for the property values to be set. 325 5094 lling */ 326 5094 lling static int 327 5094 lling spa_prop_validate(spa_t *spa, nvlist_t *props) 328 5094 lling { 329 5094 lling nvpair_t *elem; 330 5094 lling int error = 0, reset_bootfs = 0; 331 5094 lling uint64_t objnum; 332 5094 lling 333 5094 lling elem = NULL; 334 5094 lling while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 335 5094 lling zpool_prop_t prop; 336 5094 lling char *propname, *strval; 337 5094 lling uint64_t intval; 338 5094 lling objset_t *os; 339 5363 eschrock char *slash; 340 5094 lling 341 5094 lling propname = nvpair_name(elem); 342 5094 lling 343 5094 lling if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 344 5094 lling return (EINVAL); 345 5094 lling 346 5094 lling switch (prop) { 347 5094 lling case ZPOOL_PROP_VERSION: 348 5094 lling error = nvpair_value_uint64(elem, &intval); 349 5094 lling if (!error && 350 5094 lling (intval < spa_version(spa) || intval > SPA_VERSION)) 351 5094 lling error = EINVAL; 352 5094 lling break; 353 5094 lling 354 5094 lling case ZPOOL_PROP_DELEGATION: 355 5094 lling case ZPOOL_PROP_AUTOREPLACE: 356 7538 Richard case ZPOOL_PROP_LISTSNAPS: 357 9816 George case ZPOOL_PROP_AUTOEXPAND: 358 5094 lling error = nvpair_value_uint64(elem, &intval); 359 5094 lling if (!error && intval > 1) 360 5094 lling error = EINVAL; 361 5094 lling break; 362 5094 lling 363 5094 lling case ZPOOL_PROP_BOOTFS: 364 9630 Jeff /* 365 9630 Jeff * If the pool version is less than SPA_VERSION_BOOTFS, 366 9630 Jeff * or the pool is still being created (version == 0), 367 9630 Jeff * the bootfs property cannot be set. 368 9630 Jeff */ 369 5094 lling if (spa_version(spa) < SPA_VERSION_BOOTFS) { 370 5094 lling error = ENOTSUP; 371 5094 lling break; 372 5094 lling } 373 5094 lling 374 5094 lling /* 375 7042 gw25295 * Make sure the vdev config is bootable 376 7042 gw25295 */ 377 7042 gw25295 if (!vdev_is_bootable(spa->spa_root_vdev)) { 378 5094 lling error = ENOTSUP; 379 5094 lling break; 380 5094 lling } 381 5094 lling 382 5094 lling reset_bootfs = 1; 383 5094 lling 384 5094 lling error = nvpair_value_string(elem, &strval); 385 5094 lling 386 5094 lling if (!error) { 387 7042 gw25295 uint64_t compress; 388 7042 gw25295 389 5094 lling if (strval == NULL || strval[0] == '\0') { 390 5094 lling objnum = zpool_prop_default_numeric( 391 5094 lling ZPOOL_PROP_BOOTFS); 392 5094 lling break; 393 5094 lling } 394 5094 lling 395 10298 Matthew if (error = dmu_objset_hold(strval, FTAG, &os)) 396 10298 Matthew break; 397 10298 Matthew 398 10298 Matthew /* Must be ZPL and not gzip compressed. */ 399 10298 Matthew 400 10298 Matthew if (dmu_objset_type(os) != DMU_OST_ZFS) { 401 10298 Matthew error = ENOTSUP; 402 10298 Matthew } else if ((error = dsl_prop_get_integer(strval, 403 7042 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 404 7042 gw25295 &compress, NULL)) == 0 && 405 7042 gw25295 !BOOTFS_COMPRESS_VALID(compress)) { 406 7042 gw25295 error = ENOTSUP; 407 7042 gw25295 } else { 408 7042 gw25295 objnum = dmu_objset_id(os); 409 7042 gw25295 } 410 10298 Matthew dmu_objset_rele(os, FTAG); 411 5094 lling } 412 5094 lling break; 413 7754 Jeff 414 5329 gw25295 case ZPOOL_PROP_FAILUREMODE: 415 5329 gw25295 error = nvpair_value_uint64(elem, &intval); 416 5329 gw25295 if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 417 5329 gw25295 intval > ZIO_FAILURE_MODE_PANIC)) 418 5329 gw25295 error = EINVAL; 419 5329 gw25295 420 5329 gw25295 /* 421 5329 gw25295 * This is a special case which only occurs when 422 5329 gw25295 * the pool has completely failed. This allows 423 5329 gw25295 * the user to change the in-core failmode property 424 5329 gw25295 * without syncing it out to disk (I/Os might 425 5329 gw25295 * currently be blocked). We do this by returning 426 5329 gw25295 * EIO to the caller (spa_prop_set) to trick it 427 5329 gw25295 * into thinking we encountered a property validation 428 5329 gw25295 * error. 429 5329 gw25295 */ 430 7754 Jeff if (!error && spa_suspended(spa)) { 431 5329 gw25295 spa->spa_failmode = intval; 432 5329 gw25295 error = EIO; 433 5329 gw25295 } 434 5363 eschrock break; 435 5363 eschrock 436 5363 eschrock case ZPOOL_PROP_CACHEFILE: 437 5363 eschrock if ((error = nvpair_value_string(elem, &strval)) != 0) 438 5363 eschrock break; 439 5363 eschrock 440 5363 eschrock if (strval[0] == '\0') 441 5363 eschrock break; 442 5363 eschrock 443 5363 eschrock if (strcmp(strval, "none") == 0) 444 5363 eschrock break; 445 5363 eschrock 446 5363 eschrock if (strval[0] != '/') { 447 5363 eschrock error = EINVAL; 448 5363 eschrock break; 449 5363 eschrock } 450 5363 eschrock 451 5363 eschrock slash = strrchr(strval, '/'); 452 5363 eschrock ASSERT(slash != NULL); 453 5363 eschrock 454 5363 eschrock if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 455 5363 eschrock strcmp(slash, "/..") == 0) 456 5363 eschrock error = EINVAL; 457 5329 gw25295 break; 458 10922 Jeff 459 10922 Jeff case ZPOOL_PROP_DEDUPDITTO: 460 10922 Jeff if (spa_version(spa) < SPA_VERSION_DEDUP) 461 10922 Jeff error = ENOTSUP; 462 10922 Jeff else 463 10922 Jeff error = nvpair_value_uint64(elem, &intval); 464 10922 Jeff if (error == 0 && 465 10922 Jeff intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 466 10922 Jeff error = EINVAL; 467 10922 Jeff break; 468 5094 lling } 469 5094 lling 470 5094 lling if (error) 471 5094 lling break; 472 5094 lling } 473 5094 lling 474 5094 lling if (!error && reset_bootfs) { 475 5094 lling error = nvlist_remove(props, 476 5094 lling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 477 5094 lling 478 5094 lling if (!error) { 479 5094 lling error = nvlist_add_uint64(props, 480 5094 lling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 481 5094 lling } 482 5094 lling } 483 5094 lling 484 5094 lling return (error); 485 5094 lling } 486 5094 lling 487 8525 Eric void 488 8525 Eric spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 489 8525 Eric { 490 8525 Eric char *cachefile; 491 8525 Eric spa_config_dirent_t *dp; 492 8525 Eric 493 8525 Eric if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 494 8525 Eric &cachefile) != 0) 495 8525 Eric return; 496 8525 Eric 497 8525 Eric dp = kmem_alloc(sizeof (spa_config_dirent_t), 498 8525 Eric KM_SLEEP); 499 8525 Eric 500 8525 Eric if (cachefile[0] == '\0') 501 8525 Eric dp->scd_path = spa_strdup(spa_config_path); 502 8525 Eric else if (strcmp(cachefile, "none") == 0) 503 8525 Eric dp->scd_path = NULL; 504 8525 Eric else 505 8525 Eric dp->scd_path = spa_strdup(cachefile); 506 8525 Eric 507 8525 Eric list_insert_head(&spa->spa_config_list, dp); 508 8525 Eric if (need_sync) 509 8525 Eric spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 510 8525 Eric } 511 8525 Eric 512 5094 lling int 513 5094 lling spa_prop_set(spa_t *spa, nvlist_t *nvp) 514 5094 lling { 515 5094 lling int error; 516 8525 Eric nvpair_t *elem; 517 8525 Eric boolean_t need_sync = B_FALSE; 518 8525 Eric zpool_prop_t prop; 519 5094 lling 520 5094 lling if ((error = spa_prop_validate(spa, nvp)) != 0) 521 5094 lling return (error); 522 5094 lling 523 8525 Eric elem = NULL; 524 8525 Eric while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 525 8525 Eric if ((prop = zpool_name_to_prop( 526 8525 Eric nvpair_name(elem))) == ZPROP_INVAL) 527 8525 Eric return (EINVAL); 528 8525 Eric 529 8525 Eric if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT) 530 8525 Eric continue; 531 8525 Eric 532 8525 Eric need_sync = B_TRUE; 533 8525 Eric break; 534 8525 Eric } 535 8525 Eric 536 8525 Eric if (need_sync) 537 8525 Eric return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 538 8525 Eric spa, nvp, 3)); 539 8525 Eric else 540 8525 Eric return (0); 541 5094 lling } 542 5094 lling 543 5094 lling /* 544 5094 lling * If the bootfs property value is dsobj, clear it. 545 5094 lling */ 546 5094 lling void 547 5094 lling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 548 5094 lling { 549 5094 lling if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 550 5094 lling VERIFY(zap_remove(spa->spa_meta_objset, 551 5094 lling spa->spa_pool_props_object, 552 5094 lling zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 553 5094 lling spa->spa_bootfs = 0; 554 5094 lling } 555 5094 lling } 556 789 ahrens 557 789 ahrens /* 558 789 ahrens * ========================================================================== 559 789 ahrens * SPA state manipulation (open/create/destroy/import/export) 560 789 ahrens * ========================================================================== 561 789 ahrens */ 562 789 ahrens 563 1544 eschrock static int 564 1544 eschrock spa_error_entry_compare(const void *a, const void *b) 565 1544 eschrock { 566 1544 eschrock spa_error_entry_t *sa = (spa_error_entry_t *)a; 567 1544 eschrock spa_error_entry_t *sb = (spa_error_entry_t *)b; 568 1544 eschrock int ret; 569 1544 eschrock 570 1544 eschrock ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 571 1544 eschrock sizeof (zbookmark_t)); 572 1544 eschrock 573 1544 eschrock if (ret < 0) 574 1544 eschrock return (-1); 575 1544 eschrock else if (ret > 0) 576 1544 eschrock return (1); 577 1544 eschrock else 578 1544 eschrock return (0); 579 1544 eschrock } 580 1544 eschrock 581 1544 eschrock /* 582 1544 eschrock * Utility function which retrieves copies of the current logs and 583 1544 eschrock * re-initializes them in the process. 584 1544 eschrock */ 585 1544 eschrock void 586 1544 eschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 587 1544 eschrock { 588 1544 eschrock ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 589 1544 eschrock 590 1544 eschrock bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 591 1544 eschrock bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 592 1544 eschrock 593 1544 eschrock avl_create(&spa->spa_errlist_scrub, 594 1544 eschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 595 1544 eschrock offsetof(spa_error_entry_t, se_avl)); 596 1544 eschrock avl_create(&spa->spa_errlist_last, 597 1544 eschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 598 1544 eschrock offsetof(spa_error_entry_t, se_avl)); 599 1544 eschrock } 600 1544 eschrock 601 11173 Jonathan static taskq_t * 602 11173 Jonathan spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, 603 11173 Jonathan uint_t value) 604 11173 Jonathan { 605 11173 Jonathan uint_t flags = TASKQ_PREPOPULATE; 606 11173 Jonathan boolean_t batch = B_FALSE; 607 11173 Jonathan 608 11173 Jonathan switch (mode) { 609 11173 Jonathan case zti_mode_null: 610 11173 Jonathan return (NULL); /* no taskq needed */ 611 11173 Jonathan 612 11173 Jonathan case zti_mode_fixed: 613 11173 Jonathan ASSERT3U(value, >=, 1); 614 11173 Jonathan value = MAX(value, 1); 615 11173 Jonathan break; 616 11173 Jonathan 617 11173 Jonathan case zti_mode_batch: 618 11173 Jonathan batch = B_TRUE; 619 11173 Jonathan flags |= TASKQ_THREADS_CPU_PCT; 620 11173 Jonathan value = zio_taskq_batch_pct; 621 11173 Jonathan break; 622 11173 Jonathan 623 11173 Jonathan case zti_mode_online_percent: 624 11173 Jonathan flags |= TASKQ_THREADS_CPU_PCT; 625 11173 Jonathan break; 626 11173 Jonathan 627 11173 Jonathan default: 628 11173 Jonathan panic("unrecognized mode for %s taskq (%u:%u) in " 629 11173 Jonathan "spa_activate()", 630 11173 Jonathan name, mode, value); 631 11173 Jonathan break; 632 11173 Jonathan } 633 11173 Jonathan 634 11173 Jonathan if (zio_taskq_sysdc && spa->spa_proc != &p0) { 635 11173 Jonathan if (batch) 636 11173 Jonathan flags |= TASKQ_DC_BATCH; 637 11173 Jonathan 638 11173 Jonathan return (taskq_create_sysdc(name, value, 50, INT_MAX, 639 11173 Jonathan spa->spa_proc, zio_taskq_basedc, flags)); 640 11173 Jonathan } 641 11173 Jonathan return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, 642 11173 Jonathan spa->spa_proc, flags)); 643 11173 Jonathan } 644 11173 Jonathan 645 11173 Jonathan static void 646 11173 Jonathan spa_create_zio_taskqs(spa_t *spa) 647 11173 Jonathan { 648 7754 Jeff for (int t = 0; t < ZIO_TYPES; t++) { 649 7754 Jeff for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 650 11146 George const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 651 11146 George enum zti_modes mode = ztip->zti_mode; 652 11146 George uint_t value = ztip->zti_value; 653 9515 Jonathan char name[32]; 654 9515 Jonathan 655 9515 Jonathan (void) snprintf(name, sizeof (name), 656 11146 George "%s_%s", zio_type_name[t], zio_taskq_types[q]); 657 9515 Jonathan 658 11173 Jonathan spa->spa_zio_taskq[t][q] = 659 11173 Jonathan spa_taskq_create(spa, name, mode, value); 660 11173 Jonathan } 661 11173 Jonathan } 662 11173 Jonathan } 663 11173 Jonathan 664 11173 Jonathan #ifdef _KERNEL 665 11173 Jonathan static void 666 11173 Jonathan spa_thread(void *arg) 667 11173 Jonathan { 668 11173 Jonathan callb_cpr_t cprinfo; 669 11173 Jonathan 670 11173 Jonathan spa_t *spa = arg; 671 11173 Jonathan user_t *pu = PTOU(curproc); 672 11173 Jonathan 673 11173 Jonathan CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, 674 11173 Jonathan spa->spa_name); 675 11173 Jonathan 676 11173 Jonathan ASSERT(curproc != &p0); 677 11173 Jonathan (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), 678 11173 Jonathan "zpool-%s", spa->spa_name); 679 11173 Jonathan (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); 680 11173 Jonathan 681 11173 Jonathan /* bind this thread to the requested psrset */ 682 11173 Jonathan if (zio_taskq_psrset_bind != PS_NONE) { 683 11173 Jonathan pool_lock(); 684 11173 Jonathan mutex_enter(&cpu_lock); 685 11173 Jonathan mutex_enter(&pidlock); 686 11173 Jonathan mutex_enter(&curproc->p_lock); 687 11173 Jonathan 688 11173 Jonathan if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 689 11173 Jonathan 0, NULL, NULL) == 0) { 690 11173 Jonathan curthread->t_bind_pset = zio_taskq_psrset_bind; 691 11173 Jonathan } else { 692 11173 Jonathan cmn_err(CE_WARN, 693 11173 Jonathan "Couldn't bind process for zfs pool \"%s\" to " 694 11173 Jonathan "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); 695 11173 Jonathan } 696 11173 Jonathan 697 11173 Jonathan mutex_exit(&curproc->p_lock); 698 11173 Jonathan mutex_exit(&pidlock); 699 11173 Jonathan mutex_exit(&cpu_lock); 700 11173 Jonathan pool_unlock(); 701 11173 Jonathan } 702 11173 Jonathan 703 11173 Jonathan if (zio_taskq_sysdc) { 704 11173 Jonathan sysdc_thread_enter(curthread, 100, 0); 705 11173 Jonathan } 706 11173 Jonathan 707 11173 Jonathan spa->spa_proc = curproc; 708 11173 Jonathan spa->spa_did = curthread->t_did; 709 11173 Jonathan 710 11173 Jonathan spa_create_zio_taskqs(spa); 711 11173 Jonathan 712 11173 Jonathan mutex_enter(&spa->spa_proc_lock); 713 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); 714 11173 Jonathan 715 11173 Jonathan spa->spa_proc_state = SPA_PROC_ACTIVE; 716 11173 Jonathan cv_broadcast(&spa->spa_proc_cv); 717 11173 Jonathan 718 11173 Jonathan CALLB_CPR_SAFE_BEGIN(&cprinfo); 719 11173 Jonathan while (spa->spa_proc_state == SPA_PROC_ACTIVE) 720 11173 Jonathan cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 721 11173 Jonathan CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); 722 11173 Jonathan 723 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); 724 11173 Jonathan spa->spa_proc_state = SPA_PROC_GONE; 725 11173 Jonathan spa->spa_proc = &p0; 726 11173 Jonathan cv_broadcast(&spa->spa_proc_cv); 727 11173 Jonathan CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ 728 11173 Jonathan 729 11173 Jonathan mutex_enter(&curproc->p_lock); 730 11173 Jonathan lwp_exit(); 731 11173 Jonathan } 732 11173 Jonathan #endif 733 11173 Jonathan 734 11173 Jonathan /* 735 11173 Jonathan * Activate an uninitialized pool. 736 11173 Jonathan */ 737 11173 Jonathan static void 738 11173 Jonathan spa_activate(spa_t *spa, int mode) 739 11173 Jonathan { 740 11173 Jonathan ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 741 11173 Jonathan 742 11173 Jonathan spa->spa_state = POOL_STATE_ACTIVE; 743 11173 Jonathan spa->spa_mode = mode; 744 11173 Jonathan 745 11173 Jonathan spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 746 11173 Jonathan spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 747 11173 Jonathan 748 11173 Jonathan /* Try to create a covering process */ 749 11173 Jonathan mutex_enter(&spa->spa_proc_lock); 750 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_NONE); 751 11173 Jonathan ASSERT(spa->spa_proc == &p0); 752 11173 Jonathan spa->spa_did = 0; 753 11173 Jonathan 754 11173 Jonathan /* Only create a process if we're going to be around a while. */ 755 11173 Jonathan if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { 756 11173 Jonathan if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, 757 11173 Jonathan NULL, 0) == 0) { 758 11173 Jonathan spa->spa_proc_state = SPA_PROC_CREATED; 759 11173 Jonathan while (spa->spa_proc_state == SPA_PROC_CREATED) { 760 11173 Jonathan cv_wait(&spa->spa_proc_cv, 761 11173 Jonathan &spa->spa_proc_lock); 762 11173 Jonathan } 763 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 764 11173 Jonathan ASSERT(spa->spa_proc != &p0); 765 11173 Jonathan ASSERT(spa->spa_did != 0); 766 11173 Jonathan } else { 767 11173 Jonathan #ifdef _KERNEL 768 11173 Jonathan cmn_err(CE_WARN, 769 11173 Jonathan "Couldn't create process for zfs pool \"%s\"\n", 770 11173 Jonathan spa->spa_name); 771 11173 Jonathan #endif 772 11173 Jonathan } 773 11173 Jonathan } 774 11173 Jonathan mutex_exit(&spa->spa_proc_lock); 775 11173 Jonathan 776 11173 Jonathan /* If we didn't create a process, we need to create our taskqs. */ 777 11173 Jonathan if (spa->spa_proc == &p0) { 778 11173 Jonathan spa_create_zio_taskqs(spa); 779 7754 Jeff } 780 7754 Jeff 781 7754 Jeff list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 782 7754 Jeff offsetof(vdev_t, vdev_config_dirty_node)); 783 7754 Jeff list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 784 7754 Jeff offsetof(vdev_t, vdev_state_dirty_node)); 785 789 ahrens 786 789 ahrens txg_list_create(&spa->spa_vdev_txg_list, 787 789 ahrens offsetof(struct vdev, vdev_txg_node)); 788 1544 eschrock 789 1544 eschrock avl_create(&spa->spa_errlist_scrub, 790 1544 eschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 791 1544 eschrock offsetof(spa_error_entry_t, se_avl)); 792 1544 eschrock avl_create(&spa->spa_errlist_last, 793 1544 eschrock spa_error_entry_compare, sizeof (spa_error_entry_t), 794 1544 eschrock offsetof(spa_error_entry_t, se_avl)); 795 789 ahrens } 796 789 ahrens 797 789 ahrens /* 798 789 ahrens * Opposite of spa_activate(). 799 789 ahrens */ 800 789 ahrens static void 801 789 ahrens spa_deactivate(spa_t *spa) 802 789 ahrens { 803 789 ahrens ASSERT(spa->spa_sync_on == B_FALSE); 804 789 ahrens ASSERT(spa->spa_dsl_pool == NULL); 805 789 ahrens ASSERT(spa->spa_root_vdev == NULL); 806 9630 Jeff ASSERT(spa->spa_async_zio_root == NULL); 807 789 ahrens ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 808 789 ahrens 809 789 ahrens txg_list_destroy(&spa->spa_vdev_txg_list); 810 789 ahrens 811 7754 Jeff list_destroy(&spa->spa_config_dirty_list); 812 7754 Jeff list_destroy(&spa->spa_state_dirty_list); 813 7754 Jeff 814 7754 Jeff for (int t = 0; t < ZIO_TYPES; t++) { 815 7754 Jeff for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 816 11146 George if (spa->spa_zio_taskq[t][q] != NULL) 817 11146 George taskq_destroy(spa->spa_zio_taskq[t][q]); 818 7754 Jeff spa->spa_zio_taskq[t][q] = NULL; 819 7754 Jeff } 820 789 ahrens } 821 789 ahrens 822 789 ahrens metaslab_class_destroy(spa->spa_normal_class); 823 789 ahrens spa->spa_normal_class = NULL; 824 4527 perrin 825 4527 perrin metaslab_class_destroy(spa->spa_log_class); 826 4527 perrin spa->spa_log_class = NULL; 827 1544 eschrock 828 1544 eschrock /* 829 1544 eschrock * If this was part of an import or the open otherwise failed, we may 830 1544 eschrock * still have errors left in the queues. Empty them just in case. 831 1544 eschrock */ 832 1544 eschrock spa_errlog_drain(spa); 833 1544 eschrock 834 1544 eschrock avl_destroy(&spa->spa_errlist_scrub); 835 1544 eschrock avl_destroy(&spa->spa_errlist_last); 836 789 ahrens 837 789 ahrens spa->spa_state = POOL_STATE_UNINITIALIZED; 838 11173 Jonathan 839 11173 Jonathan mutex_enter(&spa->spa_proc_lock); 840 11173 Jonathan if (spa->spa_proc_state != SPA_PROC_NONE) { 841 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 842 11173 Jonathan spa->spa_proc_state = SPA_PROC_DEACTIVATE; 843 11173 Jonathan cv_broadcast(&spa->spa_proc_cv); 844 11173 Jonathan while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { 845 11173 Jonathan ASSERT(spa->spa_proc != &p0); 846 11173 Jonathan cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 847 11173 Jonathan } 848 11173 Jonathan ASSERT(spa->spa_proc_state == SPA_PROC_GONE); 849 11173 Jonathan spa->spa_proc_state = SPA_PROC_NONE; 850 11173 Jonathan } 851 11173 Jonathan ASSERT(spa->spa_proc == &p0); 852 11173 Jonathan mutex_exit(&spa->spa_proc_lock); 853 11173 Jonathan 854 11173 Jonathan /* 855 11173 Jonathan * We want to make sure spa_thread() has actually exited the ZFS 856 11173 Jonathan * module, so that the module can't be unloaded out from underneath 857 11173 Jonathan * it. 858 11173 Jonathan */ 859 11173 Jonathan if (spa->spa_did != 0) { 860 11173 Jonathan thread_join(spa->spa_did); 861 11173 Jonathan spa->spa_did = 0; 862 11173 Jonathan } 863 789 ahrens } 864 789 ahrens 865 789 ahrens /* 866 789 ahrens * Verify a pool configuration, and construct the vdev tree appropriately. This 867 789 ahrens * will create all the necessary vdevs in the appropriate layout, with each vdev 868 789 ahrens * in the CLOSED state. This will prep the pool before open/creation/import. 869 789 ahrens * All vdev validation is done by the vdev_alloc() routine. 870 789 ahrens */ 871 2082 eschrock static int 872 2082 eschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 873 2082 eschrock uint_t id, int atype) 874 789 ahrens { 875 789 ahrens nvlist_t **child; 876 9816 George uint_t children; 877 2082 eschrock int error; 878 789 ahrens 879 2082 eschrock if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 880 2082 eschrock return (error); 881 789 ahrens 882 2082 eschrock if ((*vdp)->vdev_ops->vdev_op_leaf) 883 2082 eschrock return (0); 884 789 ahrens 885 7754 Jeff error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 886 7754 Jeff &child, &children); 887 7754 Jeff 888 7754 Jeff if (error == ENOENT) 889 7754 Jeff return (0); 890 7754 Jeff 891 7754 Jeff if (error) { 892 2082 eschrock vdev_free(*vdp); 893 2082 eschrock *vdp = NULL; 894 2082 eschrock return (EINVAL); 895 789 ahrens } 896 789 ahrens 897 9816 George for (int c = 0; c < children; c++) { 898 2082 eschrock vdev_t *vd; 899 2082 eschrock if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 900 2082 eschrock atype)) != 0) { 901 2082 eschrock vdev_free(*vdp); 902 2082 eschrock *vdp = NULL; 903 2082 eschrock return (error); 904 789 ahrens } 905 789 ahrens } 906 789 ahrens 907 2082 eschrock ASSERT(*vdp != NULL); 908 2082 eschrock 909 2082 eschrock return (0); 910 789 ahrens } 911 789 ahrens 912 789 ahrens /* 913 789 ahrens * Opposite of spa_load(). 914 789 ahrens */ 915 789 ahrens static void 916 789 ahrens spa_unload(spa_t *spa) 917 789 ahrens { 918 2082 eschrock int i; 919 2082 eschrock 920 7754 Jeff ASSERT(MUTEX_HELD(&spa_namespace_lock)); 921 7754 Jeff 922 789 ahrens /* 923 1544 eschrock * Stop async tasks. 924 1544 eschrock */ 925 1544 eschrock spa_async_suspend(spa); 926 1544 eschrock 927 1544 eschrock /* 928 789 ahrens * Stop syncing. 929 789 ahrens */ 930 789 ahrens if (spa->spa_sync_on) { 931 789 ahrens txg_sync_stop(spa->spa_dsl_pool); 932 789 ahrens spa->spa_sync_on = B_FALSE; 933 789 ahrens } 934 789 ahrens 935 789 ahrens /* 936 7754 Jeff * Wait for any outstanding async I/O to complete. 937 7754 Jeff */ 938 9234 George if (spa->spa_async_zio_root != NULL) { 939 9234 George (void) zio_wait(spa->spa_async_zio_root); 940 9234 George spa->spa_async_zio_root = NULL; 941 9234 George } 942 789 ahrens 943 789 ahrens /* 944 789 ahrens * Close the dsl pool. 945 789 ahrens */ 946 789 ahrens if (spa->spa_dsl_pool) { 947 789 ahrens dsl_pool_close(spa->spa_dsl_pool); 948 789 ahrens spa->spa_dsl_pool = NULL; 949 789 ahrens } 950 10922 Jeff 951 10922 Jeff ddt_unload(spa); 952 8241 Jeff 953 8241 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 954 8241 Jeff 955 8241 Jeff /* 956 8241 Jeff * Drop and purge level 2 cache 957 8241 Jeff */ 958 8241 Jeff spa_l2cache_drop(spa); 959 789 ahrens 960 789 ahrens /* 961 789 ahrens * Close all vdevs. 962 789 ahrens */ 963 1585 bonwick if (spa->spa_root_vdev) 964 789 ahrens vdev_free(spa->spa_root_vdev); 965 1585 bonwick ASSERT(spa->spa_root_vdev == NULL); 966 1544 eschrock 967 5450 brendan for (i = 0; i < spa->spa_spares.sav_count; i++) 968 5450 brendan vdev_free(spa->spa_spares.sav_vdevs[i]); 969 5450 brendan if (spa->spa_spares.sav_vdevs) { 970 5450 brendan kmem_free(spa->spa_spares.sav_vdevs, 971 5450 brendan spa->spa_spares.sav_count * sizeof (void *)); 972 5450 brendan spa->spa_spares.sav_vdevs = NULL; 973 5450 brendan } 974 5450 brendan if (spa->spa_spares.sav_config) { 975 5450 brendan nvlist_free(spa->spa_spares.sav_config); 976 5450 brendan spa->spa_spares.sav_config = NULL; 977 5450 brendan } 978 7377 Eric spa->spa_spares.sav_count = 0; 979 5450 brendan 980 5450 brendan for (i = 0; i < spa->spa_l2cache.sav_count; i++) 981 5450 brendan vdev_free(spa->spa_l2cache.sav_vdevs[i]); 982 5450 brendan if (spa->spa_l2cache.sav_vdevs) { 983 5450 brendan kmem_free(spa->spa_l2cache.sav_vdevs, 984 5450 brendan spa->spa_l2cache.sav_count * sizeof (void *)); 985 5450 brendan spa->spa_l2cache.sav_vdevs = NULL; 986 5450 brendan } 987 5450 brendan if (spa->spa_l2cache.sav_config) { 988 5450 brendan nvlist_free(spa->spa_l2cache.sav_config); 989 5450 brendan spa->spa_l2cache.sav_config = NULL; 990 2082 eschrock } 991 7377 Eric spa->spa_l2cache.sav_count = 0; 992 2082 eschrock 993 1544 eschrock spa->spa_async_suspended = 0; 994 8241 Jeff 995 8241 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 996 2082 eschrock } 997 2082 eschrock 998 2082 eschrock /* 999 2082 eschrock * Load (or re-load) the current list of vdevs describing the active spares for 1000 2082 eschrock * this pool. When this is called, we have some form of basic information in 1001 5450 brendan * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 1002 5450 brendan * then re-generate a more complete list including status information. 1003 2082 eschrock */ 1004 2082 eschrock static void 1005 2082 eschrock spa_load_spares(spa_t *spa) 1006 2082 eschrock { 1007 2082 eschrock nvlist_t **spares; 1008 2082 eschrock uint_t nspares; 1009 2082 eschrock int i; 1010 3377 eschrock vdev_t *vd, *tvd; 1011 7754 Jeff 1012 7754 Jeff ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1013 2082 eschrock 1014 2082 eschrock /* 1015 2082 eschrock * First, close and free any existing spare vdevs. 1016 2082 eschrock */ 1017 5450 brendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 1018 5450 brendan vd = spa->spa_spares.sav_vdevs[i]; 1019 3377 eschrock 1020 3377 eschrock /* Undo the call to spa_activate() below */ 1021 6643 eschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1022 6643 eschrock B_FALSE)) != NULL && tvd->vdev_isspare) 1023 3377 eschrock spa_spare_remove(tvd); 1024 3377 eschrock vdev_close(vd); 1025 3377 eschrock vdev_free(vd); 1026 2082 eschrock } 1027 3377 eschrock 1028 5450 brendan if (spa->spa_spares.sav_vdevs) 1029 5450 brendan kmem_free(spa->spa_spares.sav_vdevs, 1030 5450 brendan spa->spa_spares.sav_count * sizeof (void *)); 1031 5450 brendan 1032 5450 brendan if (spa->spa_spares.sav_config == NULL) 1033 2082 eschrock nspares = 0; 1034 2082 eschrock else 1035 5450 brendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 1036 2082 eschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 1037 2082 eschrock 1038 5450 brendan spa->spa_spares.sav_count = (int)nspares; 1039 5450 brendan spa->spa_spares.sav_vdevs = NULL; 1040 2082 eschrock 1041 2082 eschrock if (nspares == 0) 1042 2082 eschrock return; 1043 2082 eschrock 1044 2082 eschrock /* 1045 2082 eschrock * Construct the array of vdevs, opening them to get status in the 1046 3377 eschrock * process. For each spare, there is potentially two different vdev_t 1047 3377 eschrock * structures associated with it: one in the list of spares (used only 1048 3377 eschrock * for basic validation purposes) and one in the active vdev 1049 3377 eschrock * configuration (if it's spared in). During this phase we open and 1050 3377 eschrock * validate each vdev on the spare list. If the vdev also exists in the 1051 3377 eschrock * active configuration, then we also mark this vdev as an active spare. 1052 2082 eschrock */ 1053 5450 brendan spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 1054 5450 brendan KM_SLEEP); 1055 5450 brendan for (i = 0; i < spa->spa_spares.sav_count; i++) { 1056 2082 eschrock VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 1057 2082 eschrock VDEV_ALLOC_SPARE) == 0); 1058 2082 eschrock ASSERT(vd != NULL); 1059 2082 eschrock 1060 5450 brendan spa->spa_spares.sav_vdevs[i] = vd; 1061 3377 eschrock 1062 6643 eschrock if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1063 6643 eschrock B_FALSE)) != NULL) { 1064 3377 eschrock if (!tvd->vdev_isspare) 1065 3377 eschrock spa_spare_add(tvd); 1066 3377 eschrock 1067 3377 eschrock /* 1068 3377 eschrock * We only mark the spare active if we were successfully 1069 3377 eschrock * able to load the vdev. Otherwise, importing a pool 1070 3377 eschrock * with a bad active spare would result in strange 1071 3377 eschrock * behavior, because multiple pool would think the spare 1072 3377 eschrock * is actively in use. 1073 3377 eschrock * 1074 3377 eschrock * There is a vulnerability here to an equally bizarre 1075 3377 eschrock * circumstance, where a dead active spare is later 1076 3377 eschrock * brought back to life (onlined or otherwise). Given 1077 3377 eschrock * the rarity of this scenario, and the extra complexity 1078 3377 eschrock * it adds, we ignore the possibility. 1079 3377 eschrock */ 1080 3377 eschrock if (!vdev_is_dead(tvd)) 1081 3377 eschrock spa_spare_activate(tvd); 1082 3377 eschrock } 1083 2082 eschrock 1084 7754 Jeff vd->vdev_top = vd; 1085 9425 Eric vd->vdev_aux = &spa->spa_spares; 1086 7754 Jeff 1087 2082 eschrock if (vdev_open(vd) != 0) 1088 2082 eschrock continue; 1089 2082 eschrock 1090 5450 brendan if (vdev_validate_aux(vd) == 0) 1091 5450 brendan spa_spare_add(vd); 1092 2082 eschrock } 1093 2082 eschrock 1094 2082 eschrock /* 1095 2082 eschrock * Recompute the stashed list of spares, with status information 1096 2082 eschrock * this time. 1097 2082 eschrock */ 1098 5450 brendan VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 1099 2082 eschrock DATA_TYPE_NVLIST_ARRAY) == 0); 1100 2082 eschrock 1101 5450 brendan spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 1102 5450 brendan KM_SLEEP); 1103 5450 brendan for (i = 0; i < spa->spa_spares.sav_count; i++) 1104 5450 brendan spares[i] = vdev_config_generate(spa, 1105 5450 brendan spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 1106 5450 brendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 1107 5450 brendan ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 1108 5450 brendan for (i = 0; i < spa->spa_spares.sav_count; i++) 1109 2082 eschrock nvlist_free(spares[i]); 1110 5450 brendan kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 1111 5450 brendan } 1112 5450 brendan 1113 5450 brendan /* 1114 5450 brendan * Load (or re-load) the current list of vdevs describing the active l2cache for 1115 5450 brendan * this pool. When this is called, we have some form of basic information in 1116 5450 brendan * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 1117 5450 brendan * then re-generate a more complete list including status information. 1118 5450 brendan * Devices which are already active have their details maintained, and are 1119 5450 brendan * not re-opened. 1120 5450 brendan */ 1121 5450 brendan static void 1122 5450 brendan spa_load_l2cache(spa_t *spa) 1123 5450 brendan { 1124 5450 brendan nvlist_t **l2cache; 1125 5450 brendan uint_t nl2cache; 1126 5450 brendan int i, j, oldnvdevs; 1127 9816 George uint64_t guid; 1128 5450 brendan vdev_t *vd, **oldvdevs, **newvdevs; 1129 5450 brendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 1130 5450 brendan 1131 7754 Jeff ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1132 7754 Jeff 1133 5450 brendan if (sav->sav_config != NULL) { 1134 5450 brendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 1135 5450 brendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1136 5450 brendan newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 1137 5450 brendan } else { 1138 5450 brendan nl2cache = 0; 1139 5450 brendan } 1140 5450 brendan 1141 5450 brendan oldvdevs = sav->sav_vdevs; 1142 5450 brendan oldnvdevs = sav->sav_count; 1143 5450 brendan sav->sav_vdevs = NULL; 1144 5450 brendan sav->sav_count = 0; 1145 5450 brendan 1146 5450 brendan /* 1147 5450 brendan * Process new nvlist of vdevs. 1148 5450 brendan */ 1149 5450 brendan for (i = 0; i < nl2cache; i++) { 1150 5450 brendan VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 1151 5450 brendan &guid) == 0); 1152 5450 brendan 1153 5450 brendan newvdevs[i] = NULL; 1154 5450 brendan for (j = 0; j < oldnvdevs; j++) { 1155 5450 brendan vd = oldvdevs[j]; 1156 5450 brendan if (vd != NULL && guid == vd->vdev_guid) { 1157 5450 brendan /* 1158 5450 brendan * Retain previous vdev for add/remove ops. 1159 5450 brendan */ 1160 5450 brendan newvdevs[i] = vd; 1161 5450 brendan oldvdevs[j] = NULL; 1162 5450 brendan break; 1163 5450 brendan } 1164 5450 brendan } 1165 5450 brendan 1166 5450 brendan if (newvdevs[i] == NULL) { 1167 5450 brendan /* 1168 5450 brendan * Create new vdev 1169 5450 brendan */ 1170 5450 brendan VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 1171 5450 brendan VDEV_ALLOC_L2CACHE) == 0); 1172 5450 brendan ASSERT(vd != NULL); 1173 5450 brendan newvdevs[i] = vd; 1174 5450 brendan 1175 5450 brendan /* 1176 5450 brendan * Commit this vdev as an l2cache device, 1177 5450 brendan * even if it fails to open. 1178 5450 brendan */ 1179 5450 brendan spa_l2cache_add(vd); 1180 5450 brendan 1181 6643 eschrock vd->vdev_top = vd; 1182 6643 eschrock vd->vdev_aux = sav; 1183 6643 eschrock 1184 6643 eschrock spa_l2cache_activate(vd); 1185 6643 eschrock 1186 5450 brendan if (vdev_open(vd) != 0) 1187 5450 brendan continue; 1188 5450 brendan 1189 5450 brendan (void) vdev_validate_aux(vd); 1190 5450 brendan 1191 9816 George if (!vdev_is_dead(vd)) 1192 9816 George l2arc_add_vdev(spa, vd); 1193 5450 brendan } 1194 5450 brendan } 1195 5450 brendan 1196 5450 brendan /* 1197 5450 brendan * Purge vdevs that were dropped 1198 5450 brendan */ 1199 5450 brendan for (i = 0; i < oldnvdevs; i++) { 1200 5450 brendan uint64_t pool; 1201 5450 brendan 1202 5450 brendan vd = oldvdevs[i]; 1203 5450 brendan if (vd != NULL) { 1204 8241 Jeff if (spa_l2cache_exists(vd->vdev_guid, &pool) && 1205 8241 Jeff pool != 0ULL && l2arc_vdev_present(vd)) 1206 5450 brendan l2arc_remove_vdev(vd); 1207 5450 brendan (void) vdev_close(vd); 1208 5450 brendan spa_l2cache_remove(vd); 1209 5450 brendan } 1210 5450 brendan } 1211 5450 brendan 1212 5450 brendan if (oldvdevs) 1213 5450 brendan kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1214 5450 brendan 1215 5450 brendan if (sav->sav_config == NULL) 1216 5450 brendan goto out; 1217 5450 brendan 1218 5450 brendan sav->sav_vdevs = newvdevs; 1219 5450 brendan sav->sav_count = (int)nl2cache; 1220 5450 brendan 1221 5450 brendan /* 1222 5450 brendan * Recompute the stashed list of l2cache devices, with status 1223 5450 brendan * information this time. 1224 5450 brendan */ 1225 5450 brendan VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1226 5450 brendan DATA_TYPE_NVLIST_ARRAY) == 0); 1227 5450 brendan 1228 5450 brendan l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1229 5450 brendan for (i = 0; i < sav->sav_count; i++) 1230 5450 brendan l2cache[i] = vdev_config_generate(spa, 1231 5450 brendan sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 1232 5450 brendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1233 5450 brendan ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1234 5450 brendan out: 1235 5450 brendan for (i = 0; i < sav->sav_count; i++) 1236 5450 brendan nvlist_free(l2cache[i]); 1237 5450 brendan if (sav->sav_count) 1238 5450 brendan kmem_free(l2cache, sav->sav_count * sizeof (void *)); 1239 2082 eschrock } 1240 2082 eschrock 1241 2082 eschrock static int 1242 2082 eschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 1243 2082 eschrock { 1244 2082 eschrock dmu_buf_t *db; 1245 2082 eschrock char *packed = NULL; 1246 2082 eschrock size_t nvsize = 0; 1247 2082 eschrock int error; 1248 2082 eschrock *value = NULL; 1249 2082 eschrock 1250 2082 eschrock VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 1251 2082 eschrock nvsize = *(uint64_t *)db->db_data; 1252 2082 eschrock dmu_buf_rele(db, FTAG); 1253 2082 eschrock 1254 2082 eschrock packed = kmem_alloc(nvsize, KM_SLEEP); 1255 9512 Neil error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 1256 9512 Neil DMU_READ_PREFETCH); 1257 2082 eschrock if (error == 0) 1258 2082 eschrock error = nvlist_unpack(packed, nvsize, value, 0); 1259 2082 eschrock kmem_free(packed, nvsize); 1260 2082 eschrock 1261 2082 eschrock return (error); 1262 789 ahrens } 1263 789 ahrens 1264 789 ahrens /* 1265 4451 eschrock * Checks to see if the given vdev could not be opened, in which case we post a 1266 4451 eschrock * sysevent to notify the autoreplace code that the device has been removed. 1267 4451 eschrock */ 1268 4451 eschrock static void 1269 4451 eschrock spa_check_removed(vdev_t *vd) 1270 4451 eschrock { 1271 9816 George for (int c = 0; c < vd->vdev_children; c++) 1272 4451 eschrock spa_check_removed(vd->vdev_child[c]); 1273 4451 eschrock 1274 4451 eschrock if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 1275 4451 eschrock zfs_post_autoreplace(vd->vdev_spa, vd); 1276 4451 eschrock spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 1277 4451 eschrock } 1278 4451 eschrock } 1279 4451 eschrock 1280 4451 eschrock /* 1281 9701 George * Load the slog device state from the config object since it's possible 1282 9701 George * that the label does not contain the most up-to-date information. 1283 9701 George */ 1284 9701 George void 1285 10594 George spa_load_log_state(spa_t *spa, nvlist_t *nv) 1286 10594 George { 1287 10594 George vdev_t *ovd, *rvd = spa->spa_root_vdev; 1288 10594 George 1289 10594 George /* 1290 10594 George * Load the original root vdev tree from the passed config. 1291 10594 George */ 1292 10594 George spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1293 10594 George VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1294 10594 George 1295 10594 George for (int c = 0; c < rvd->vdev_children; c++) { 1296 10594 George vdev_t *cvd = rvd->vdev_child[c]; 1297 10594 George if (cvd->vdev_islog) 1298 10594 George vdev_load_log_state(cvd, ovd->vdev_child[c]); 1299 10594 George } 1300 10594 George vdev_free(ovd); 1301 10594 George spa_config_exit(spa, SCL_ALL, FTAG); 1302 9701 George } 1303 9701 George 1304 9701 George /* 1305 7294 perrin * Check for missing log devices 1306 7294 perrin */ 1307 7294 perrin int 1308 7294 perrin spa_check_logs(spa_t *spa) 1309 7294 perrin { 1310 7294 perrin switch (spa->spa_log_state) { 1311 7294 perrin case SPA_LOG_MISSING: 1312 7294 perrin /* need to recheck in case slog has been restored */ 1313 7294 perrin case SPA_LOG_UNKNOWN: 1314 7294 perrin if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1315 7294 perrin DS_FIND_CHILDREN)) { 1316 7294 perrin spa->spa_log_state = SPA_LOG_MISSING; 1317 7294 perrin return (1); 1318 7294 perrin } 1319 7294 perrin break; 1320 9701 George } 1321 7294 perrin return (0); 1322 7294 perrin } 1323 7294 perrin 1324 10672 Eric static void 1325 10672 Eric spa_aux_check_removed(spa_aux_vdev_t *sav) 1326 10672 Eric { 1327 10922 Jeff for (int i = 0; i < sav->sav_count; i++) 1328 10672 Eric spa_check_removed(sav->sav_vdevs[i]); 1329 10922 Jeff } 1330 10922 Jeff 1331 10922 Jeff void 1332 10922 Jeff spa_claim_notify(zio_t *zio) 1333 10922 Jeff { 1334 10922 Jeff spa_t *spa = zio->io_spa; 1335 10922 Jeff 1336 10922 Jeff if (zio->io_error) 1337 10922 Jeff return; 1338 10922 Jeff 1339 10922 Jeff mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 1340 10922 Jeff if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 1341 10922 Jeff spa->spa_claim_max_txg = zio->io_bp->blk_birth; 1342 10922 Jeff mutex_exit(&spa->spa_props_lock); 1343 10672 Eric } 1344 10672 Eric 1345 10921 Tim typedef struct spa_load_error { 1346 10921 Tim uint64_t sle_metadata_count; 1347 10921 Tim uint64_t sle_data_count; 1348 10921 Tim } spa_load_error_t; 1349 10921 Tim 1350 10921 Tim static void 1351 10921 Tim spa_load_verify_done(zio_t *zio) 1352 10921 Tim { 1353 10921 Tim blkptr_t *bp = zio->io_bp; 1354 10921 Tim spa_load_error_t *sle = zio->io_private; 1355 10921 Tim dmu_object_type_t type = BP_GET_TYPE(bp); 1356 10921 Tim int error = zio->io_error; 1357 10921 Tim 1358 10921 Tim if (error) { 1359 10921 Tim if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) && 1360 10921 Tim type != DMU_OT_INTENT_LOG) 1361 10921 Tim atomic_add_64(&sle->sle_metadata_count, 1); 1362 10921 Tim else 1363 10921 Tim atomic_add_64(&sle->sle_data_count, 1); 1364 10921 Tim } 1365 10921 Tim zio_data_buf_free(zio->io_data, zio->io_size); 1366 10921 Tim } 1367 10921 Tim 1368 10921 Tim /*ARGSUSED*/ 1369 10921 Tim static int 1370 10922 Jeff spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 1371 10922 Jeff const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1372 10921 Tim { 1373 10921 Tim if (bp != NULL) { 1374 10921 Tim zio_t *rio = arg; 1375 10921 Tim size_t size = BP_GET_PSIZE(bp); 1376 10921 Tim void *data = zio_data_buf_alloc(size); 1377 10921 Tim 1378 10921 Tim zio_nowait(zio_read(rio, spa, bp, data, size, 1379 10921 Tim spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 1380 10921 Tim ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 1381 10921 Tim ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 1382 10921 Tim } 1383 10921 Tim return (0); 1384 10921 Tim } 1385 10921 Tim 1386 10921 Tim static int 1387 10921 Tim spa_load_verify(spa_t *spa) 1388 10921 Tim { 1389 10921 Tim zio_t *rio; 1390 10921 Tim spa_load_error_t sle = { 0 }; 1391 10921 Tim zpool_rewind_policy_t policy; 1392 10921 Tim boolean_t verify_ok = B_FALSE; 1393 10921 Tim int error; 1394 10921 Tim 1395 10921 Tim rio = zio_root(spa, NULL, &sle, 1396 10921 Tim ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1397 10921 Tim 1398 11125 Jeff error = traverse_pool(spa, spa->spa_verify_min_txg, 1399 11125 Jeff TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); 1400 10921 Tim 1401 10921 Tim (void) zio_wait(rio); 1402 10921 Tim 1403 10921 Tim zpool_get_rewind_policy(spa->spa_config, &policy); 1404 10921 Tim 1405 10921 Tim spa->spa_load_meta_errors = sle.sle_metadata_count; 1406 10921 Tim spa->spa_load_data_errors = sle.sle_data_count; 1407 10921 Tim 1408 10921 Tim if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta && 1409 10921 Tim sle.sle_data_count <= policy.zrp_maxdata) { 1410 10921 Tim verify_ok = B_TRUE; 1411 10921 Tim spa->spa_load_txg = spa->spa_uberblock.ub_txg; 1412 10921 Tim spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 1413 11026 Tim } else { 1414 11026 Tim spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1415 10921 Tim } 1416 10921 Tim 1417 10921 Tim if (error) { 1418 10921 Tim if (error != ENXIO && error != EIO) 1419 10921 Tim error = EIO; 1420 10921 Tim return (error); 1421 10921 Tim } 1422 10921 Tim 1423 10921 Tim return (verify_ok ? 0 : EIO); 1424 10921 Tim } 1425 10921 Tim 1426 7294 perrin /* 1427 789 ahrens * Load an existing storage pool, using the pool's builtin spa_config as a 1428 1544 eschrock * source of configuration information. 1429 789 ahrens */ 1430 789 ahrens static int 1431 10921 Tim spa_load(spa_t *spa, spa_load_state_t state, int mosconfig) 1432 789 ahrens { 1433 789 ahrens int error = 0; 1434 10594 George nvlist_t *nvconfig, *nvroot = NULL; 1435 789 ahrens vdev_t *rvd; 1436 789 ahrens uberblock_t *ub = &spa->spa_uberblock; 1437 1635 bonwick uint64_t config_cache_txg = spa->spa_config_txg; 1438 789 ahrens uint64_t pool_guid; 1439 2082 eschrock uint64_t version; 1440 4451 eschrock uint64_t autoreplace = 0; 1441 8241 Jeff int orig_mode = spa->spa_mode; 1442 7294 perrin char *ereport = FM_EREPORT_ZFS_POOL; 1443 10921 Tim nvlist_t *config = spa->spa_config; 1444 8241 Jeff 1445 8241 Jeff /* 1446 8241 Jeff * If this is an untrusted config, access the pool in read-only mode. 1447 8241 Jeff * This prevents things like resilvering recently removed devices. 1448 8241 Jeff */ 1449 8241 Jeff if (!mosconfig) 1450 8241 Jeff spa->spa_mode = FREAD; 1451 7754 Jeff 1452 7754 Jeff ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1453 789 ahrens 1454 1544 eschrock spa->spa_load_state = state; 1455 1635 bonwick 1456 789 ahrens if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1457 1733 bonwick nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1458 1544 eschrock error = EINVAL; 1459 1544 eschrock goto out; 1460 1544 eschrock } 1461 2082 eschrock 1462 2082 eschrock /* 1463 2082 eschrock * Versioning wasn't explicitly added to the label until later, so if 1464 2082 eschrock * it's not present treat it as the initial version. 1465 2082 eschrock */ 1466 2082 eschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1467 4577 ahrens version = SPA_VERSION_INITIAL; 1468 1733 bonwick 1469 1733 bonwick (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1470 1733 bonwick &spa->spa_config_txg); 1471 789 ahrens 1472 1635 bonwick if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1473 1544 eschrock spa_guid_exists(pool_guid, 0)) { 1474 1544 eschrock error = EEXIST; 1475 1544 eschrock goto out; 1476 1544 eschrock } 1477 2174 eschrock 1478 2174 eschrock spa->spa_load_guid = pool_guid; 1479 9234 George 1480 9234 George /* 1481 9234 George * Create "The Godfather" zio to hold all async IOs 1482 9234 George */ 1483 9630 Jeff spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 1484 9630 Jeff ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 1485 789 ahrens 1486 789 ahrens /* 1487 2082 eschrock * Parse the configuration into a vdev tree. We explicitly set the 1488 2082 eschrock * value that will be returned by spa_version() since parsing the 1489 2082 eschrock * configuration requires knowing the version number. 1490 789 ahrens */ 1491 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1492 2082 eschrock spa->spa_ubsync.ub_version = version; 1493 2082 eschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1494 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 1495 789 ahrens 1496 2082 eschrock if (error != 0) 1497 1544 eschrock goto out; 1498 789 ahrens 1499 1585 bonwick ASSERT(spa->spa_root_vdev == rvd); 1500 789 ahrens ASSERT(spa_guid(spa) == pool_guid); 1501 789 ahrens 1502 789 ahrens /* 1503 789 ahrens * Try to open all vdevs, loading each label in the process. 1504 789 ahrens */ 1505 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1506 4070 mc142369 error = vdev_open(rvd); 1507 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 1508 4070 mc142369 if (error != 0) 1509 1544 eschrock goto out; 1510 789 ahrens 1511 789 ahrens /* 1512 9276 Mark * We need to validate the vdev labels against the configuration that 1513 9276 Mark * we have in hand, which is dependent on the setting of mosconfig. If 1514 9276 Mark * mosconfig is true then we're validating the vdev labels based on 1515 9276 Mark * that config. Otherwise, we're validating against the cached config 1516 9276 Mark * (zpool.cache) that was read when we loaded the zfs module, and then 1517 9276 Mark * later we will recursively call spa_load() and validate against 1518 9276 Mark * the vdev config. 1519 9276 Mark */ 1520 9276 Mark spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1521 9276 Mark error = vdev_validate(rvd); 1522 9276 Mark spa_config_exit(spa, SCL_ALL, FTAG); 1523 9276 Mark if (error != 0) 1524 9276 Mark goto out; 1525 1986 eschrock 1526 1986 eschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1527 1986 eschrock error = ENXIO; 1528 1986 eschrock goto out; 1529 1986 eschrock } 1530 1986 eschrock 1531 1986 eschrock /* 1532 789 ahrens * Find the best uberblock. 1533 789 ahrens */ 1534 7754 Jeff vdev_uberblock_load(NULL, rvd, ub); 1535 789 ahrens 1536 789 ahrens /* 1537 789 ahrens * If we weren't able to find a single valid uberblock, return failure. 1538 789 ahrens */ 1539 789 ahrens if (ub->ub_txg == 0) { 1540 1760 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1541 1760 eschrock VDEV_AUX_CORRUPT_DATA); 1542 1544 eschrock error = ENXIO; 1543 1544 eschrock goto out; 1544 1544 eschrock } 1545 1544 eschrock 1546 1544 eschrock /* 1547 1544 eschrock * If the pool is newer than the code, we can't open it. 1548 1544 eschrock */ 1549 4577 ahrens if (ub->ub_version > SPA_VERSION) { 1550 1760 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1551 1760 eschrock VDEV_AUX_VERSION_NEWER); 1552 1544 eschrock error = ENOTSUP; 1553 1544 eschrock goto out; 1554 789 ahrens } 1555 789 ahrens 1556 789 ahrens /* 1557 789 ahrens * If the vdev guid sum doesn't match the uberblock, we have an 1558 789 ahrens * incomplete configuration. 1559 789 ahrens */ 1560 1732 bonwick if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1561 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1562 1544 eschrock VDEV_AUX_BAD_GUID_SUM); 1563 1544 eschrock error = ENXIO; 1564 1544 eschrock goto out; 1565 789 ahrens } 1566 789 ahrens 1567 789 ahrens /* 1568 789 ahrens * Initialize internal SPA structures. 1569 789 ahrens */ 1570 789 ahrens spa->spa_state = POOL_STATE_ACTIVE; 1571 789 ahrens spa->spa_ubsync = spa->spa_uberblock; 1572 10921 Tim spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 1573 10921 Tim TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE; 1574 10921 Tim spa->spa_first_txg = spa->spa_last_ubsync_txg ? 1575 10921 Tim spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 1576 10922 Jeff spa->spa_claim_max_txg = spa->spa_first_txg; 1577 10922 Jeff 1578 1544 eschrock error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1579 1544 eschrock if (error) { 1580 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1581 1544 eschrock VDEV_AUX_CORRUPT_DATA); 1582 10921 Tim error = EIO; 1583 1544 eschrock goto out; 1584 1544 eschrock } 1585 789 ahrens spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1586 789 ahrens 1587 1544 eschrock if (zap_lookup(spa->spa_meta_objset, 1588 789 ahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1589 1544 eschrock sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1590 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1591 1544 eschrock VDEV_AUX_CORRUPT_DATA); 1592 1544 eschrock error = EIO; 1593 1544 eschrock goto out; 1594 1544 eschrock } 1595 789 ahrens 1596 10594 George if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { 1597 10594 George vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1598 10594 George VDEV_AUX_CORRUPT_DATA); 1599 10594 George error = EIO; 1600 10594 George goto out; 1601 10594 George } 1602 10594 George 1603 789 ahrens if (!mosconfig) { 1604 3975 ek110237 uint64_t hostid; 1605 789 ahrens 1606 10594 George if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 1607 7706 Lin ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 1608 3975 ek110237 char *hostname; 1609 3975 ek110237 unsigned long myhostid = 0; 1610 3975 ek110237 1611 10594 George VERIFY(nvlist_lookup_string(nvconfig, 1612 3975 ek110237 ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 1613 3975 ek110237 1614 8662 Jordan #ifdef _KERNEL 1615 8662 Jordan myhostid = zone_get_hostid(NULL); 1616 8662 Jordan #else /* _KERNEL */ 1617 8662 Jordan /* 1618 8662 Jordan * We're emulating the system's hostid in userland, so 1619 8662 Jordan * we can't use zone_get_hostid(). 1620 8662 Jordan */ 1621 3975 ek110237 (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 1622 8662 Jordan #endif /* _KERNEL */ 1623 4178 lling if (hostid != 0 && myhostid != 0 && 1624 8662 Jordan hostid != myhostid) { 1625 3975 ek110237 cmn_err(CE_WARN, "pool '%s' could not be " 1626 3975 ek110237 "loaded as it was last accessed by " 1627 7706 Lin "another system (host: %s hostid: 0x%lx). " 1628 3975 ek110237 "See: http://www.sun.com/msg/ZFS-8000-EY", 1629 7754 Jeff spa_name(spa), hostname, 1630 3975 ek110237 (unsigned long)hostid); 1631 3975 ek110237 error = EBADF; 1632 3975 ek110237 goto out; 1633 3975 ek110237 } 1634 1544 eschrock } 1635 789 ahrens 1636 10594 George spa_config_set(spa, nvconfig); 1637 789 ahrens spa_unload(spa); 1638 789 ahrens spa_deactivate(spa); 1639 8241 Jeff spa_activate(spa, orig_mode); 1640 789 ahrens 1641 10921 Tim return (spa_load(spa, state, B_TRUE)); 1642 789 ahrens } 1643 789 ahrens 1644 1544 eschrock if (zap_lookup(spa->spa_meta_objset, 1645 789 ahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1646 10922 Jeff sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) { 1647 2082 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1648 2082 eschrock VDEV_AUX_CORRUPT_DATA); 1649 2082 eschrock error = EIO; 1650 2082 eschrock goto out; 1651 2082 eschrock } 1652 2082 eschrock 1653 2082 eschrock /* 1654 2082 eschrock * Load the bit that tells us to use the new accounting function 1655 2082 eschrock * (raid-z deflation). If we have an older pool, this will not 1656 2082 eschrock * be present. 1657 2082 eschrock */ 1658 2082 eschrock error = zap_lookup(spa->spa_meta_objset, 1659 2082 eschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 1660 2082 eschrock sizeof (uint64_t), 1, &spa->spa_deflate); 1661 2082 eschrock if (error != 0 && error != ENOENT) { 1662 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1663 1544 eschrock VDEV_AUX_CORRUPT_DATA); 1664 1544 eschrock error = EIO; 1665 1544 eschrock goto out; 1666 1544 eschrock } 1667 789 ahrens 1668 789 ahrens /* 1669 1544 eschrock * Load the persistent error log. If we have an older pool, this will 1670 1544 eschrock * not be present. 1671 789 ahrens */ 1672 1544 eschrock error = zap_lookup(spa->spa_meta_objset, 1673 1544 eschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1674 1544 eschrock sizeof (uint64_t), 1, &spa->spa_errlog_last); 1675 1807 bonwick if (error != 0 && error != ENOENT) { 1676 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1677 1544 eschrock VDEV_AUX_CORRUPT_DATA); 1678 1544 eschrock error = EIO; 1679 1544 eschrock goto out; 1680 1544 eschrock } 1681 1544 eschrock 1682 1544 eschrock error = zap_lookup(spa->spa_meta_objset, 1683 1544 eschrock DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1684 1544 eschrock sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1685 2926 ek110237 if (error != 0 && error != ENOENT) { 1686 2926 ek110237 vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1687 2926 ek110237 VDEV_AUX_CORRUPT_DATA); 1688 2926 ek110237 error = EIO; 1689 2926 ek110237 goto out; 1690 2926 ek110237 } 1691 2926 ek110237 1692 2926 ek110237 /* 1693 2926 ek110237 * Load the history object. If we have an older pool, this 1694 2926 ek110237 * will not be present. 1695 2926 ek110237 */ 1696 2926 ek110237 error = zap_lookup(spa->spa_meta_objset, 1697 2926 ek110237 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 1698 2926 ek110237 sizeof (uint64_t), 1, &spa->spa_history); 1699 1544 eschrock if (error != 0 && error != ENOENT) { 1700 1544 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1701 1544 eschrock VDEV_AUX_CORRUPT_DATA); 1702 1544 eschrock error = EIO; 1703 1544 eschrock goto out; 1704 2082 eschrock } 1705 2082 eschrock 1706 2082 eschrock /* 1707 2082 eschrock * Load any hot spares for this pool. 1708 2082 eschrock */ 1709 2082 eschrock error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1710 5450 brendan DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 1711 2082 eschrock if (error != 0 && error != ENOENT) { 1712 2082 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1713 2082 eschrock VDEV_AUX_CORRUPT_DATA); 1714 2082 eschrock error = EIO; 1715 2082 eschrock goto out; 1716 2082 eschrock } 1717 2082 eschrock if (error == 0) { 1718 4577 ahrens ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1719 5450 brendan if (load_nvlist(spa, spa->spa_spares.sav_object, 1720 5450 brendan &spa->spa_spares.sav_config) != 0) { 1721 2082 eschrock vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1722 2082 eschrock VDEV_AUX_CORRUPT_DATA); 1723 2082 eschrock error = EIO; 1724 2082 eschrock goto out; 1725 2082 eschrock } 1726 2082 eschrock 1727 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1728 7754 Jeff spa_load_spares(spa); 1729 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 1730 5450 brendan } 1731 5450 brendan 1732 5450 brendan /* 1733 5450 brendan * Load any level 2 ARC devices for this pool. 1734 5450 brendan */ 1735 5450 brendan error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1736 5450 brendan DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1737 5450 brendan &spa->spa_l2cache.sav_object); 1738 5450 brendan if (error != 0 && error != ENOENT) { 1739 5450 brendan vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1740 5450 brendan VDEV_AUX_CORRUPT_DATA); 1741 5450 brendan error = EIO; 1742 5450 brendan goto out; 1743 5450 brendan } 1744 5450 brendan if (error == 0) { 1745 5450 brendan ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1746 5450 brendan if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1747 5450 brendan &spa->spa_l2cache.sav_config) != 0) { 1748 5450 brendan vdev_set_state(rvd, B_TRUE, 1749 5450 brendan VDEV_STATE_CANT_OPEN, 1750 5450 brendan VDEV_AUX_CORRUPT_DATA); 1751 5450 brendan error = EIO; 1752 5450 brendan goto out; 1753 5450 brendan } 1754 5450 brendan 1755 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1756 7754 Jeff spa_load_l2cache(spa); 1757 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 1758 1544 eschrock } 1759 1544 eschrock 1760 5094 lling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1761 4543 marks 1762 3912 lling error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1763 3912 lling DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1764 3912 lling 1765 3912 lling if (error && error != ENOENT) { 1766 3912 lling vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1767 3912 lling VDEV_AUX_CORRUPT_DATA); 1768 3912 lling error = EIO; 1769 3912 lling goto out; 1770 3912 lling } 1771 3912 lling 1772 3912 lling if (error == 0) { 1773 3912 lling (void) zap_lookup(spa->spa_meta_objset, 1774 3912 lling spa->spa_pool_props_object, 1775 4451 eschrock zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1776 3912 lling sizeof (uint64_t), 1, &spa->spa_bootfs); 1777 4451 eschrock (void) zap_lookup(spa->spa_meta_objset, 1778 4451 eschrock spa->spa_pool_props_object, 1779 4451 eschrock zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1780 4451 eschrock sizeof (uint64_t), 1, &autoreplace); 1781 10672 Eric spa->spa_autoreplace = (autoreplace != 0); 1782 4543 marks (void) zap_lookup(spa->spa_meta_objset, 1783 4543 marks spa->spa_pool_props_object, 1784 4543 marks zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1785 4543 marks sizeof (uint64_t), 1, &spa->spa_delegation); 1786 5329 gw25295 (void) zap_lookup(spa->spa_meta_objset, 1787 5329 gw25295 spa->spa_pool_props_object, 1788 5329 gw25295 zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 1789 5329 gw25295 sizeof (uint64_t), 1, &spa->spa_failmode); 1790 9816 George (void) zap_lookup(spa->spa_meta_objset, 1791 9816 George spa->spa_pool_props_object, 1792 9816 George zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND), 1793 9816 George sizeof (uint64_t), 1, &spa->spa_autoexpand); 1794 10922 Jeff (void) zap_lookup(spa->spa_meta_objset, 1795 10922 Jeff spa->spa_pool_props_object, 1796 10922 Jeff zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO), 1797 10922 Jeff sizeof (uint64_t), 1, &spa->spa_dedup_ditto); 1798 3912 lling } 1799 4451 eschrock 1800 4451 eschrock /* 1801 4451 eschrock * If the 'autoreplace' property is set, then post a resource notifying 1802 4451 eschrock * the ZFS DE that it should not issue any faults for unopenable 1803 4451 eschrock * devices. We also iterate over the vdevs, and post a sysevent for any 1804 4451 eschrock * unopenable vdevs so that the normal autoreplace handler can take 1805 4451 eschrock * over. 1806 4451 eschrock */ 1807 10672 Eric if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 1808 4451 eschrock spa_check_removed(spa->spa_root_vdev); 1809 10672 Eric /* 1810 10672 Eric * For the import case, this is done in spa_import(), because 1811 10672 Eric * at this point we're using the spare definitions from 1812 10672 Eric * the MOS config, not necessarily from the userland config. 1813 10672 Eric */ 1814 10672 Eric if (state != SPA_LOAD_IMPORT) { 1815 10672 Eric spa_aux_check_removed(&spa->spa_spares); 1816 10672 Eric spa_aux_check_removed(&spa->spa_l2cache); 1817 10672 Eric } 1818 10672 Eric } 1819 3912 lling 1820 1544 eschrock /* 1821 1986 eschrock * Load the vdev state for all toplevel vdevs. 1822 1544 eschrock */ 1823 1986 eschrock vdev_load(rvd); 1824 789 ahrens 1825 789 ahrens /* 1826 789 ahrens * Propagate the leaf DTLs we just loaded all the way up the tree. 1827 789 ahrens */ 1828 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1829 789 ahrens vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1830 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 1831 789 ahrens 1832 789 ahrens /* 1833 789 ahrens * Check the state of the root vdev. If it can't be opened, it 1834 789 ahrens * indicates one or more toplevel vdevs are faulted. 1835 789 ahrens */ 1836 1544 eschrock if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1837 1544 eschrock error = ENXIO; 1838 1544 eschrock goto out; 1839 1544 eschrock } 1840 789 ahrens 1841 10922 Jeff /* 1842 10922 Jeff * Load the DDTs (dedup tables). 1843 10922 Jeff */ 1844 10922 Jeff error = ddt_load(spa); 1845 10922 Jeff if (error != 0) { 1846 10922 Jeff vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1847 10922 Jeff VDEV_AUX_CORRUPT_DATA); 1848 10922 Jeff error = EIO; 1849 10922 Jeff goto out; 1850 10922 Jeff } 1851 10922 Jeff 1852 10956 George spa_update_dspace(spa); 1853 10956 George 1854 10921 Tim if (state != SPA_LOAD_TRYIMPORT) { 1855 10921 Tim error = spa_load_verify(spa); 1856 10921 Tim if (error) { 1857 10921 Tim vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1858 10921 Tim VDEV_AUX_CORRUPT_DATA); 1859 10921 Tim goto out; 1860 10921 Tim } 1861 10921 Tim } 1862 10921 Tim 1863 10922 Jeff /* 1864 10922 Jeff * Load the intent log state and check log integrity. 1865 10922 Jeff */ 1866 10922 Jeff VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, 1867 10922 Jeff &nvroot) == 0); 1868 10922 Jeff spa_load_log_state(spa, nvroot); 1869 10922 Jeff nvlist_free(nvconfig); 1870 10922 Jeff 1871 10922 Jeff if (spa_check_logs(spa)) { 1872 10922 Jeff vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1873 10922 Jeff VDEV_AUX_BAD_LOG); 1874 10922 Jeff error = ENXIO; 1875 10922 Jeff ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1876 10922 Jeff goto out; 1877 10922 Jeff } 1878 10922 Jeff 1879 10921 Tim if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 1880 10921 Tim spa->spa_load_max_txg == UINT64_MAX)) { 1881 1635 bonwick dmu_tx_t *tx; 1882 1635 bonwick int need_update = B_FALSE; 1883 8241 Jeff 1884 8241 Jeff ASSERT(state != SPA_LOAD_TRYIMPORT); 1885 1601 bonwick 1886 1635 bonwick /* 1887 1635 bonwick * Claim log blocks that haven't been committed yet. 1888 1635 bonwick * This must all happen in a single txg. 1889 10922 Jeff * Note: spa_claim_max_txg is updated by spa_claim_notify(), 1890 10922 Jeff * invoked from zil_claim_log_block()'s i/o done callback. 1891 10921 Tim * Price of rollback is that we abandon the log. 1892 1635 bonwick */ 1893 10922 Jeff spa->spa_claiming = B_TRUE; 1894 10922 Jeff 1895 1601 bonwick tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1896 789 ahrens spa_first_txg(spa)); 1897 7754 Jeff (void) dmu_objset_find(spa_name(spa), 1898 2417 ahrens zil_claim, tx, DS_FIND_CHILDREN); 1899 789 ahrens dmu_tx_commit(tx); 1900 789 ahrens 1901 10922 Jeff spa->spa_claiming = B_FALSE; 1902 10922 Jeff 1903 9701 George spa->spa_log_state = SPA_LOG_GOOD; 1904 789 ahrens spa->spa_sync_on = B_TRUE; 1905 789 ahrens txg_sync_start(spa->spa_dsl_pool); 1906 789 ahrens 1907 789 ahrens /* 1908 10922 Jeff * Wait for all claims to sync. We sync up to the highest 1909 10922 Jeff * claimed log block birth time so that claimed log blocks 1910 10922 Jeff * don't appear to be from the future. spa_claim_max_txg 1911 10922 Jeff * will have been set for us by either zil_check_log_chain() 1912 10922 Jeff * (invoked from spa_check_logs()) or zil_claim() above. 1913 10922 Jeff */ 1914 10922 Jeff txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 1915 1585 bonwick 1916 1585 bonwick /* 1917 1635 bonwick * If the config cache is stale, or we have uninitialized 1918 1635 bonwick * metaslabs (see spa_vdev_add()), then update the config. 1919 10100 Lin * 1920 10100 Lin * If spa_load_verbatim is true, trust the current 1921 10100 Lin * in-core spa_config and update the disk labels. 1922 1585 bonwick */ 1923 1635 bonwick if (config_cache_txg != spa->spa_config_txg || 1924 10921 Tim state == SPA_LOAD_IMPORT || spa->spa_load_verbatim || 1925 10921 Tim state == SPA_LOAD_RECOVER) 1926 1635 bonwick need_update = B_TRUE; 1927 1635 bonwick 1928 8241 Jeff for (int c = 0; c < rvd->vdev_children; c++) 1929 1635 bonwick if (rvd->vdev_child[c]->vdev_ms_array == 0) 1930 1635 bonwick need_update = B_TRUE; 1931 1585 bonwick 1932 1585 bonwick /* 1933 1635 bonwick * Update the config cache asychronously in case we're the 1934 1635 bonwick * root pool, in which case the config cache isn't writable yet. 1935 1585 bonwick */ 1936 1635 bonwick if (need_update) 1937 1635 bonwick spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 1938 8241 Jeff 1939 8241 Jeff /* 1940 8241 Jeff * Check all DTLs to see if anything needs resilvering. 1941 8241 Jeff */ 1942 8241 Jeff if (vdev_resilver_needed(rvd, NULL, NULL)) 1943 8241 Jeff spa_async_request(spa, SPA_ASYNC_RESILVER); 1944 10298 Matthew 1945 10298 Matthew /* 1946 10298 Matthew * Delete any inconsistent datasets. 1947 10298 Matthew */ 1948 10298 Matthew (void) dmu_objset_find(spa_name(spa), 1949 10298 Matthew dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 1950 10342 chris 1951 10342 chris /* 1952 10342 chris * Clean up any stale temporary dataset userrefs. 1953 10342 chris */ 1954 10342 chris dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 1955 789 ahrens } 1956 789 ahrens 1957 1544 eschrock error = 0; 1958 1544 eschrock out: 1959 10921 Tim 1960 7046 ahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 1961 2082 eschrock if (error && error != EBADF) 1962 7294 perrin zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1963 11149 George 1964 11149 George spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; 1965 1544 eschrock spa->spa_ena = 0; 1966 1544 eschrock 1967 1544 eschrock return (error); 1968 789 ahrens } 1969 789 ahrens 1970 10921 Tim static int 1971 10921 Tim spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 1972 10921 Tim { 1973 10921 Tim spa_unload(spa); 1974 10921 Tim spa_deactivate(spa); 1975 10921 Tim 1976 10921 Tim spa->spa_load_max_txg--; 1977 10921 Tim 1978 10921 Tim spa_activate(spa, spa_mode_global); 1979 10921 Tim spa_async_suspend(spa); 1980 10921 Tim 1981 10921 Tim return (spa_load(spa, state, mosconfig)); 1982 10921 Tim } 1983 10921 Tim 1984 10921 Tim static int 1985 10921 Tim spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 1986 10921 Tim uint64_t max_request, boolean_t extreme) 1987 10921 Tim { 1988 10921 Tim nvlist_t *config = NULL; 1989 10921 Tim int load_error, rewind_error; 1990 10921 Tim uint64_t safe_rollback_txg; 1991 10921 Tim uint64_t min_txg; 1992 10921 Tim 1993 11026 Tim if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 1994 10921 Tim spa->spa_load_max_txg = spa->spa_load_txg; 1995 11026 Tim spa->spa_log_state = SPA_LOG_CLEAR; 1996 11026 Tim } else { 1997 10921 Tim spa->spa_load_max_txg = max_request; 1998 11026 Tim } 1999 10921 Tim 2000 10921 Tim load_error = rewind_error = spa_load(spa, state, mosconfig); 2001 10921 Tim if (load_error == 0) 2002 10921 Tim return (0); 2003 10921 Tim 2004 10921 Tim if (spa->spa_root_vdev != NULL) 2005 10921 Tim config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2006 10921 Tim 2007 10921 Tim spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 2008 10921 Tim spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 2009 10921 Tim 2010 10921 Tim /* specific txg requested */ 2011 10921 Tim if (spa->spa_load_max_txg != UINT64_MAX && !extreme) { 2012 10921 Tim nvlist_free(config); 2013 10921 Tim return (load_error); 2014 10921 Tim } 2015 10921 Tim 2016 10921 Tim /* Price of rolling back is discarding txgs, including log */ 2017 10921 Tim if (state == SPA_LOAD_RECOVER) 2018 10921 Tim spa->spa_log_state = SPA_LOG_CLEAR; 2019 10921 Tim 2020 10921 Tim spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 2021 10921 Tim safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE; 2022 10921 Tim 2023 10921 Tim min_txg = extreme ? TXG_INITIAL : safe_rollback_txg; 2024 10921 Tim while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) { 2025 10921 Tim if (spa->spa_load_max_txg < safe_rollback_txg) 2026 10921 Tim spa->spa_extreme_rewind = B_TRUE; 2027 10921 Tim rewind_error = spa_load_retry(spa, state, mosconfig); 2028 10921 Tim } 2029 10921 Tim 2030 10921 Tim if (config) 2031 10921 Tim spa_rewind_data_to_nvlist(spa, config); 2032 10921 Tim 2033 10921 Tim spa->spa_extreme_rewind = B_FALSE; 2034 10921 Tim spa->spa_load_max_txg = UINT64_MAX; 2035 10921 Tim 2036 10921 Tim if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 2037 10921 Tim spa_config_set(spa, config); 2038 10921 Tim 2039 10921 Tim return (state == SPA_LOAD_RECOVER ? rewind_error : load_error); 2040 10921 Tim } 2041 10921 Tim 2042 789 ahrens /* 2043 789 ahrens * Pool Open/Import 2044 789 ahrens * 2045 789 ahrens * The import case is identical to an open except that the configuration is sent 2046 789 ahrens * down from userland, instead of grabbed from the configuration cache. For the 2047 789 ahrens * case of an open, the pool configuration will exist in the 2048 4451 eschrock * POOL_STATE_UNINITIALIZED state. 2049 789 ahrens * 2050 789 ahrens * The stats information (gen/count/ustats) is used to gather vdev statistics at 2051 789 ahrens * the same time open the pool, without having to keep around the spa_t in some 2052 789 ahrens * ambiguous state. 2053 789 ahrens */ 2054 789 ahrens static int 2055 10921 Tim spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 2056 10921 Tim nvlist_t **config) 2057 10921 Tim { 2058 10921 Tim spa_t *spa; 2059 10921 Tim boolean_t norewind; 2060 10921 Tim boolean_t extreme; 2061 10921 Tim zpool_rewind_policy_t policy; 2062 10921 Tim spa_load_state_t state = SPA_LOAD_OPEN; 2063 789 ahrens int error; 2064 789 ahrens int locked = B_FALSE; 2065 789 ahrens 2066 789 ahrens *spapp = NULL; 2067 10921 Tim 2068 10921 Tim zpool_get_rewind_policy(nvpolicy, &policy); 2069 10921 Tim if (policy.zrp_request & ZPOOL_DO_REWIND) 2070 10921 Tim state = SPA_LOAD_RECOVER; 2071 10921 Tim norewind = (policy.zrp_request == ZPOOL_NO_REWIND); 2072 10921 Tim extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0); 2073 789 ahrens 2074 789 ahrens /* 2075 789 ahrens * As disgusting as this is, we need to support recursive calls to this 2076 789 ahrens * function because dsl_dir_open() is called during spa_load(), and ends 2077 789 ahrens * up calling spa_open() again. The real fix is to figure out how to 2078 789 ahrens * avoid dsl_dir_open() calling this in the first place. 2079 789 ahrens */ 2080 789 ahrens if (mutex_owner(&spa_namespace_lock) != curthread) { 2081 789 ahrens mutex_enter(&spa_namespace_lock); 2082 789 ahrens locked = B_TRUE; 2083 789 ahrens } 2084 789 ahrens 2085 789 ahrens if ((spa = spa_lookup(pool)) == NULL) { 2086 789 ahrens if (locked) 2087 789 ahrens mutex_exit(&spa_namespace_lock); 2088 789 ahrens return (ENOENT); 2089 789 ahrens } 2090 10921 Tim 2091 789 ahrens if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 2092 789 ahrens 2093 8241 Jeff spa_activate(spa, spa_mode_global); 2094 789 ahrens 2095 10921 Tim if (spa->spa_last_open_failed && norewind) { 2096 10921 Tim if (config != NULL && spa->spa_config) 2097 10921 Tim VERIFY(nvlist_dup(spa->spa_config, 2098 10921 Tim config, KM_SLEEP) == 0); 2099 10921 Tim spa_deactivate(spa); 2100 10921 Tim if (locked) 2101 10921 Tim mutex_exit(&spa_namespace_lock); 2102 10921 Tim return (spa->spa_last_open_failed); 2103 10921 Tim } 2104 10921 Tim 2105 10921 Tim if (state != SPA_LOAD_RECOVER) 2106 10921 Tim spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 2107 10921 Tim 2108 10921 Tim error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 2109 10921 Tim extreme); 2110 789 ahrens 2111 789 ahrens if (error == EBADF) { 2112 789 ahrens /* 2113 1986 eschrock * If vdev_validate() returns failure (indicated by 2114 1986 eschrock * EBADF), it indicates that one of the vdevs indicates 2115 1986 eschrock * that the pool has been exported or destroyed. If 2116 1986 eschrock * this is the case, the config cache is out of sync and 2117 1986 eschrock * we should remove the pool from the namespace. 2118 789 ahrens */ 2119 6643 eschrock spa_unload(spa); 2120 6643 eschrock spa_deactivate(spa); 2121 6643 eschrock spa_config_sync(spa, B_TRUE, B_TRUE); 2122 789 ahrens spa_remove(spa); 2123 789 ahrens if (locked) 2124 789 ahrens mutex_exit(&spa_namespace_lock); 2125 789 ahrens return (ENOENT); 2126 1544 eschrock } 2127 1544 eschrock 2128 1544 eschrock if (error) { 2129 789 ahrens /* 2130 789 ahrens * We can't open the pool, but we still have useful 2131 789 ahrens * information: the state of each vdev after the 2132 789 ahrens * attempted vdev_open(). Return this to the user. 2133 789 ahrens */ 2134 10921 Tim if (config != NULL && spa->spa_config) 2135 10921 Tim VERIFY(nvlist_dup(spa->spa_config, config, 2136 10921 Tim KM_SLEEP) == 0); 2137 789 ahrens spa_unload(spa); 2138 789 ahrens spa_deactivate(spa); 2139 10921 Tim spa->spa_last_open_failed = error; 2140 789 ahrens if (locked) 2141 789 ahrens mutex_exit(&spa_namespace_lock); 2142 789 ahrens *spapp = NULL; 2143 789 ahrens return (error); 2144 10921 Tim } 2145 10921 Tim 2146 789 ahrens } 2147 789 ahrens 2148 789 ahrens spa_open_ref(spa, tag); 2149 4451 eschrock 2150 789 ahrens 2151 7754 Jeff if (config != NULL) 2152 789 ahrens *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2153 789 ahrens 2154 11026 Tim if (locked) { 2155 11026 Tim spa->spa_last_open_failed = 0; 2156 11026 Tim spa->spa_last_ubsync_txg = 0; 2157 11026 Tim spa->spa_load_txg = 0; 2158 11026 Tim mutex_exit(&spa_namespace_lock); 2159 11026 Tim } 2160 10921 Tim 2161 10921 Tim *spapp = spa; 2162 10921 Tim 2163 10921 Tim return (0); 2164 10921 Tim } 2165 10921 Tim 2166 10921 Tim int 2167 10921 Tim spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 2168 10921 Tim nvlist_t **config) 2169 10921 Tim { 2170 10921 Tim return (spa_open_common(name, spapp, tag, policy, config)); 2171 789 ahrens } 2172 789 ahrens 2173 789 ahrens int 2174 789 ahrens spa_open(const char *name, spa_t **spapp, void *tag) 2175 789 ahrens { 2176 10921 Tim return (spa_open_common(name, spapp, tag, NULL, NULL)); 2177 789 ahrens } 2178 789 ahrens 2179 1544 eschrock /* 2180 1544 eschrock * Lookup the given spa_t, incrementing the inject count in the process, 2181 1544 eschrock * preventing it from being exported or destroyed. 2182 1544 eschrock */ 2183 1544 eschrock spa_t * 2184 1544 eschrock spa_inject_addref(char *name) 2185 1544 eschrock { 2186 1544 eschrock spa_t *spa; 2187 1544 eschrock 2188 1544 eschrock mutex_enter(&spa_namespace_lock); 2189 1544 eschrock if ((spa = spa_lookup(name)) == NULL) { 2190 1544 eschrock mutex_exit(&spa_namespace_lock); 2191 1544 eschrock return (NULL); 2192 1544 eschrock } 2193 1544 eschrock spa->spa_inject_ref++; 2194 1544 eschrock mutex_exit(&spa_namespace_lock); 2195 1544 eschrock 2196 1544 eschrock return (spa); 2197 1544 eschrock } 2198 1544 eschrock 2199 1544 eschrock void 2200 1544 eschrock spa_inject_delref(spa_t *spa) 2201 1544 eschrock { 2202 1544 eschrock mutex_enter(&spa_namespace_lock); 2203 1544 eschrock spa->spa_inject_ref--; 2204 1544 eschrock mutex_exit(&spa_namespace_lock); 2205 1544 eschrock } 2206 1544 eschrock 2207 5450 brendan /* 2208 5450 brendan * Add spares device information to the nvlist. 2209 5450 brendan */ 2210 2082 eschrock static void 2211 2082 eschrock spa_add_spares(spa_t *spa, nvlist_t *config) 2212 2082 eschrock { 2213 2082 eschrock nvlist_t **spares; 2214 2082 eschrock uint_t i, nspares; 2215 2082 eschrock nvlist_t *nvroot; 2216 2082 eschrock uint64_t guid; 2217 2082 eschrock vdev_stat_t *vs; 2218 2082 eschrock uint_t vsc; 2219 3377 eschrock uint64_t pool; 2220 2082 eschrock 2221 9425 Eric ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 2222 9425 Eric 2223 5450 brendan if (spa->spa_spares.sav_count == 0) 2224 2082 eschrock return; 2225 2082 eschrock 2226 2082 eschrock VERIFY(nvlist_lookup_nvlist(config, 2227 2082 eschrock ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2228 5450 brendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 2229 2082 eschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 2230 2082 eschrock if (nspares != 0) { 2231 2082 eschrock VERIFY(nvlist_add_nvlist_array(nvroot, 2232 2082 eschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2233 2082 eschrock VERIFY(nvlist_lookup_nvlist_array(nvroot, 2234 2082 eschrock ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 2235 2082 eschrock 2236 2082 eschrock /* 2237 2082 eschrock * Go through and find any spares which have since been 2238 2082 eschrock * repurposed as an active spare. If this is the case, update 2239 2082 eschrock * their status appropriately. 2240 2082 eschrock */ 2241 2082 eschrock for (i = 0; i < nspares; i++) { 2242 2082 eschrock VERIFY(nvlist_lookup_uint64(spares[i], 2243 2082 eschrock ZPOOL_CONFIG_GUID, &guid) == 0); 2244 7214 lling if (spa_spare_exists(guid, &pool, NULL) && 2245 7214 lling pool != 0ULL) { 2246 2082 eschrock VERIFY(nvlist_lookup_uint64_array( 2247 2082 eschrock spares[i], ZPOOL_CONFIG_STATS, 2248 2082 eschrock (uint64_t **)&vs, &vsc) == 0); 2249 2082 eschrock vs->vs_state = VDEV_STATE_CANT_OPEN; 2250 2082 eschrock vs->vs_aux = VDEV_AUX_SPARED; 2251 2082 eschrock } 2252 2082 eschrock } 2253 2082 eschrock } 2254 2082 eschrock } 2255 2082 eschrock 2256 5450 brendan /* 2257 5450 brendan * Add l2cache device information to the nvlist, including vdev stats. 2258 5450 brendan */ 2259 5450 brendan static void 2260 5450 brendan spa_add_l2cache(spa_t *spa, nvlist_t *config) 2261 5450 brendan { 2262 5450 brendan nvlist_t **l2cache; 2263 5450 brendan uint_t i, j, nl2cache; 2264 5450 brendan nvlist_t *nvroot; 2265 5450 brendan uint64_t guid; 2266 5450 brendan vdev_t *vd; 2267 5450 brendan vdev_stat_t *vs; 2268 5450 brendan uint_t vsc; 2269 5450 brendan 2270 9425 Eric ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 2271 9425 Eric 2272 5450 brendan if (spa->spa_l2cache.sav_count == 0) 2273 5450 brendan return; 2274 5450 brendan 2275 5450 brendan VERIFY(nvlist_lookup_nvlist(config, 2276 5450 brendan ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2277 5450 brendan VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 2278 5450 brendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2279 5450 brendan if (nl2cache != 0) { 2280 5450 brendan VERIFY(nvlist_add_nvlist_array(nvroot, 2281 5450 brendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2282 5450 brendan VERIFY(nvlist_lookup_nvlist_array(nvroot, 2283 5450 brendan ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 2284 5450 brendan 2285 5450 brendan /* 2286 5450 brendan * Update level 2 cache device stats. 2287 5450 brendan */ 2288 5450 brendan 2289 5450 brendan for (i = 0; i < nl2cache; i++) { 2290 5450 brendan VERIFY(nvlist_lookup_uint64(l2cache[i], 2291 5450 brendan ZPOOL_CONFIG_GUID, &guid) == 0); 2292 5450 brendan 2293 5450 brendan vd = NULL; 2294 5450 brendan for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 2295 5450 brendan if (guid == 2296 5450 brendan spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 2297 5450 brendan vd = spa->spa_l2cache.sav_vdevs[j]; 2298 5450 brendan break; 2299 5450 brendan } 2300 5450 brendan } 2301 5450 brendan ASSERT(vd != NULL); 2302 5450 brendan 2303 5450 brendan VERIFY(nvlist_lookup_uint64_array(l2cache[i], 2304 5450 brendan ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 2305 5450 brendan vdev_get_stats(vd, vs); 2306 5450 brendan } 2307 5450 brendan } 2308 5450 brendan } 2309 5450 brendan 2310 789 ahrens int 2311 1544 eschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 2312 789 ahrens { 2313 789 ahrens int error; 2314 789 ahrens spa_t *spa; 2315 789 ahrens 2316 789 ahrens *config = NULL; 2317 10921 Tim error = spa_open_common(name, &spa, FTAG, NULL, config); 2318 1544 eschrock 2319 9425 Eric if (spa != NULL) { 2320 9425 Eric /* 2321 9425 Eric * This still leaves a window of inconsistency where the spares 2322 9425 Eric * or l2cache devices could change and the config would be 2323 9425 Eric * self-inconsistent. 2324 9425 Eric */ 2325 9425 Eric spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 2326 9425 Eric 2327 9425 Eric if (*config != NULL) { 2328 7754 Jeff VERIFY(nvlist_add_uint64(*config, 2329 9425 Eric ZPOOL_CONFIG_ERRCOUNT, 2330 9425 Eric spa_get_errlog_size(spa)) == 0); 2331 9425 Eric 2332 9425 Eric if (spa_suspended(spa)) 2333 9425 Eric VERIFY(nvlist_add_uint64(*config, 2334 9425 Eric ZPOOL_CONFIG_SUSPENDED, 2335 9425 Eric spa->spa_failmode) == 0); 2336 9425 Eric 2337 9425 Eric spa_add_spares(spa, *config); 2338 9425 Eric spa_add_l2cache(spa, *config); 2339 9425 Eric } 2340 2082 eschrock } 2341 1544 eschrock 2342 1544 eschrock /* 2343 1544 eschrock * We want to get the alternate root even for faulted pools, so we cheat 2344 1544 eschrock * and call spa_lookup() directly. 2345 1544 eschrock */ 2346 1544 eschrock if (altroot) { 2347 1544 eschrock if (spa == NULL) { 2348 1544 eschrock mutex_enter(&spa_namespace_lock); 2349 1544 eschrock spa = spa_lookup(name); 2350 1544 eschrock if (spa) 2351 1544 eschrock spa_altroot(spa, altroot, buflen); 2352 1544 eschrock else 2353 1544 eschrock altroot[0] = '\0'; 2354 1544 eschrock spa = NULL; 2355 1544 eschrock mutex_exit(&spa_namespace_lock); 2356 1544 eschrock } else { 2357 1544 eschrock spa_altroot(spa, altroot, buflen); 2358 1544 eschrock } 2359 1544 eschrock } 2360 789 ahrens 2361 9425 Eric if (spa != NULL) { 2362 9425 Eric spa_config_exit(spa, SCL_CONFIG, FTAG); 2363 789 ahrens spa_close(spa, FTAG); 2364 9425 Eric } 2365 789 ahrens 2366 789 ahrens return (error); 2367 789 ahrens } 2368 789 ahrens 2369 789 ahrens /* 2370 5450 brendan * Validate that the auxiliary device array is well formed. We must have an 2371 5450 brendan * array of nvlists, each which describes a valid leaf vdev. If this is an 2372 5450 brendan * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 2373 5450 brendan * specified, as long as they are well-formed. 2374 5450 brendan */ 2375 5450 brendan static int 2376 5450 brendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 2377 5450 brendan spa_aux_vdev_t *sav, const char *config, uint64_t version, 2378 5450 brendan vdev_labeltype_t label) 2379 5450 brendan { 2380 5450 brendan nvlist_t **dev; 2381 5450 brendan uint_t i, ndev; 2382 5450 brendan vdev_t *vd; 2383 5450 brendan int error; 2384 5450 brendan 2385 7754 Jeff ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2386 7754 Jeff 2387 5450 brendan /* 2388 5450 brendan * It's acceptable to have no devs specified. 2389 5450 brendan */ 2390 5450 brendan if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 2391 5450 brendan return (0); 2392 5450 brendan 2393 5450 brendan if (ndev == 0) 2394 2082 eschrock return (EINVAL); 2395 2082 eschrock 2396 2082 eschrock /* 2397 5450 brendan * Make sure the pool is formatted with a version that supports this 2398 5450 brendan * device type. 2399 5450 brendan */ 2400 5450 brendan if (spa_version(spa) < version) 2401 2082 eschrock return (ENOTSUP); 2402 2082 eschrock 2403 3377 eschrock /* 2404 5450 brendan * Set the pending device list so we correctly handle device in-use 2405 3377 eschrock * checking. 2406 3377 eschrock */ 2407 5450 brendan sav->sav_pending = dev; 2408 5450 brendan sav->sav_npending = ndev; 2409 5450 brendan 2410 5450 brendan for (i = 0; i < ndev; i++) { 2411 5450 brendan if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 2412 2082 eschrock mode)) != 0) 2413 3377 eschrock goto out; 2414 2082 eschrock 2415 2082 eschrock if (!vd->vdev_ops->vdev_op_leaf) { 2416 2082 eschrock vdev_free(vd); 2417 3377 eschrock error = EINVAL; 2418 3377 eschrock goto out; 2419 2082 eschrock } 2420 2082 eschrock 2421 5450 brendan /* 2422 7754 Jeff * The L2ARC currently only supports disk devices in 2423 7754 Jeff * kernel context. For user-level testing, we allow it. 2424 7754 Jeff */ 2425 7754 Jeff #ifdef _KERNEL 2426 5450 brendan if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 2427 5450 brendan strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 2428 5450 brendan error = ENOTBLK; 2429 5450 brendan goto out; 2430 5450 brendan } 2431 7754 Jeff #endif 2432 2082 eschrock vd->vdev_top = vd; 2433 3377 eschrock 2434 3377 eschrock if ((error = vdev_open(vd)) == 0 && 2435 5450 brendan (error = vdev_label_init(vd, crtxg, label)) == 0) { 2436 5450 brendan VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 2437 3377 eschrock vd->vdev_guid) == 0); 2438 2082 eschrock } 2439 2082 eschrock 2440 3377 eschrock vdev_free(vd); 2441 2082 eschrock 2442 5450 brendan if (error && 2443 5450 brendan (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 2444 3377 eschrock goto out; 2445 3377 eschrock else 2446 3377 eschrock error = 0; 2447 2082 eschrock } 2448 2082 eschrock 2449 3377 eschrock out: 2450 5450 brendan sav->sav_pending = NULL; 2451 5450 brendan sav->sav_npending = 0; 2452 5450 brendan return (error); 2453 5450 brendan } 2454 5450 brendan 2455 5450 brendan static int 2456 5450 brendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 2457 5450 brendan { 2458 5450 brendan int error; 2459 7754 Jeff 2460 7754 Jeff ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 2461 5450 brendan 2462 5450 brendan if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2463 5450 brendan &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 2464 5450 brendan VDEV_LABEL_SPARE)) != 0) { 2465 5450 brendan return (error); 2466 5450 brendan } 2467 5450 brendan 2468 5450 brendan return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 2469 5450 brendan &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 2470 5450 brendan VDEV_LABEL_L2CACHE)); 2471 5450 brendan } 2472 5450 brendan 2473 5450 brendan static void 2474 5450 brendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 2475 5450 brendan const char *config) 2476 5450 brendan { 2477 5450 brendan int i; 2478 5450 brendan 2479 5450 brendan if (sav->sav_config != NULL) { 2480 5450 brendan nvlist_t **olddevs; 2481 5450 brendan uint_t oldndevs; 2482 5450 brendan nvlist_t **newdevs; 2483 5450 brendan 2484 5450 brendan /* 2485 5450 brendan * Generate new dev list by concatentating with the 2486 5450 brendan * current dev list. 2487 5450 brendan */ 2488 5450 brendan VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 2489 5450 brendan &olddevs, &oldndevs) == 0); 2490 5450 brendan 2491 5450 brendan newdevs = kmem_alloc(sizeof (void *) * 2492 5450 brendan (ndevs + oldndevs), KM_SLEEP); 2493 5450 brendan for (i = 0; i < oldndevs; i++) 2494 5450 brendan VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 2495 5450 brendan KM_SLEEP) == 0); 2496 5450 brendan for (i = 0; i < ndevs; i++) 2497 5450 brendan VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 2498 5450 brendan KM_SLEEP) == 0); 2499 5450 brendan 2500 5450 brendan VERIFY(nvlist_remove(sav->sav_config, config, 2501 5450 brendan DATA_TYPE_NVLIST_ARRAY) == 0); 2502 5450 brendan 2503 5450 brendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, 2504 5450 brendan config, newdevs, ndevs + oldndevs) == 0); 2505 5450 brendan for (i = 0; i < oldndevs + ndevs; i++) 2506 5450 brendan nvlist_free(newdevs[i]); 2507 5450 brendan kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 2508 5450 brendan } else { 2509 5450 brendan /* 2510 5450 brendan * Generate a new dev list. 2511 5450 brendan */ 2512 5450 brendan VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 2513 5450 brendan KM_SLEEP) == 0); 2514 5450 brendan VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 2515 5450 brendan devs, ndevs) == 0); 2516 5450 brendan } 2517 5450 brendan } 2518 5450 brendan 2519 5450 brendan /* 2520 5450 brendan * Stop and drop level 2 ARC devices 2521 5450 brendan */ 2522 5450 brendan void 2523 5450 brendan spa_l2cache_drop(spa_t *spa) 2524 5450 brendan { 2525 5450 brendan vdev_t *vd; 2526 5450 brendan int i; 2527 5450 brendan spa_aux_vdev_t *sav = &spa->spa_l2cache; 2528 5450 brendan 2529 5450 brendan for (i = 0; i < sav->sav_count; i++) { 2530 5450 brendan uint64_t pool; 2531 5450 brendan 2532 5450 brendan vd = sav->sav_vdevs[i]; 2533 5450 brendan ASSERT(vd != NULL); 2534 5450 brendan 2535 8241 Jeff if (spa_l2cache_exists(vd->vdev_guid, &pool) && 2536 8241 Jeff pool != 0ULL && l2arc_vdev_present(vd)) 2537 5450 brendan l2arc_remove_vdev(vd); 2538 5450 brendan if (vd->vdev_isl2cache) 2539 5450 brendan spa_l2cache_remove(vd); 2540 5450 brendan vdev_clear_stats(vd); 2541 5450 brendan (void) vdev_close(vd); 2542 5450 brendan } 2543 2082 eschrock } 2544 2082 eschrock 2545 2082 eschrock /* 2546 789 ahrens * Pool Creation 2547 789 ahrens */ 2548 789 ahrens int 2549 5094 lling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 2550 7184 timh const char *history_str, nvlist_t *zplprops) 2551 789 ahrens { 2552 789 ahrens spa_t *spa; 2553 5094 lling char *altroot = NULL; 2554 1635 bonwick vdev_t *rvd; 2555 789 ahrens dsl_pool_t *dp; 2556 789 ahrens dmu_tx_t *tx; 2557 9816 George int error = 0; 2558 789 ahrens uint64_t txg = TXG_INITIAL; 2559 5450 brendan nvlist_t **spares, **l2cache; 2560 5450 brendan uint_t nspares, nl2cache; 2561 5094 lling uint64_t version; 2562 789 ahrens 2563 789 ahrens /* 2564 789 ahrens * If this pool already exists, return failure. 2565 789 ahrens */ 2566 789 ahrens mutex_enter(&spa_namespace_lock); 2567 789 ahrens if (spa_lookup(pool) != NULL) { 2568 789 ahrens mutex_exit(&spa_namespace_lock); 2569 789 ahrens return (EEXIST); 2570 789 ahrens } 2571 789 ahrens 2572 789 ahrens /* 2573 789 ahrens * Allocate a new spa_t structure. 2574 789 ahrens */ 2575 5094 lling (void) nvlist_lookup_string(props, 2576 5094 lling zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2577 10921 Tim spa = spa_add(pool, NULL, altroot); 2578 8241 Jeff spa_activate(spa, spa_mode_global); 2579 1601 bonwick 2580 5094 lling if (props && (error = spa_prop_validate(spa, props))) { 2581 5094 lling spa_deactivate(spa); 2582 5094 lling spa_remove(spa); 2583 6643 eschrock mutex_exit(&spa_namespace_lock); 2584 5094 lling return (error); 2585 5094 lling } 2586 5094 lling 2587 5094 lling if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 2588 5094 lling &version) != 0) 2589 5094 lling version = SPA_VERSION; 2590 5094 lling ASSERT(version <= SPA_VERSION); 2591 10922 Jeff 2592 10922 Jeff spa->spa_first_txg = txg; 2593 10922 Jeff spa->spa_uberblock.ub_txg = txg - 1; 2594 5094 lling spa->spa_uberblock.ub_version = version; 2595 789 ahrens spa->spa_ubsync = spa->spa_uberblock; 2596 9234 George 2597 9234 George /* 2598 9234 George * Create "The Godfather" zio to hold all async IOs 2599 9234 George */ 2600 9630 Jeff spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 2601 9630 Jeff ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 2602 789 ahrens 2603 1635 bonwick /* 2604 1635 bonwick * Create the root vdev. 2605 1635 bonwick */ 2606 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2607 1635 bonwick 2608 2082 eschrock error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 2609 1635 bonwick 2610 2082 eschrock ASSERT(error != 0 || rvd != NULL); 2611 2082 eschrock ASSERT(error != 0 || spa->spa_root_vdev == rvd); 2612 1635 bonwick 2613 5913 perrin if (error == 0 && !zfs_allocatable_devs(nvroot)) 2614 1635 bonwick error = EINVAL; 2615 2082 eschrock 2616 2082 eschrock if (error == 0 && 2617 2082 eschrock (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 2618 5450 brendan (error = spa_validate_aux(spa, nvroot, txg, 2619 2082 eschrock VDEV_ALLOC_ADD)) == 0) { 2620 9816 George for (int c = 0; c < rvd->vdev_children; c++) { 2621 9816 George vdev_metaslab_set_size(rvd->vdev_child[c]); 2622 9816 George vdev_expand(rvd->vdev_child[c], txg); 2623 9816 George } 2624 1635 bonwick } 2625 1635 bonwick 2626 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 2627 789 ahrens 2628 2082 eschrock if (error != 0) { 2629 789 ahrens spa_unload(spa); 2630 789 ahrens spa_deactivate(spa); 2631 789 ahrens spa_remove(spa); 2632 789 ahrens mutex_exit(&spa_namespace_lock); 2633 789 ahrens return (error); 2634 2082 eschrock } 2635 2082 eschrock 2636 2082 eschrock /* 2637 2082 eschrock * Get the list of spares, if specified. 2638 2082 eschrock */ 2639 2082 eschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 2640 2082 eschrock &spares, &nspares) == 0) { 2641 5450 brendan VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 2642 2082 eschrock KM_SLEEP) == 0); 2643 5450 brendan VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 2644 2082 eschrock ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2645 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2646 7754 Jeff spa_load_spares(spa); 2647 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 2648 5450 brendan spa->spa_spares.sav_sync = B_TRUE; 2649 5450 brendan } 2650 5450 brendan 2651 5450 brendan /* 2652 5450 brendan * Get the list of level 2 cache devices, if specified. 2653 5450 brendan */ 2654 5450 brendan if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2655 5450 brendan &l2cache, &nl2cache) == 0) { 2656 5450 brendan VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2657 5450 brendan NV_UNIQUE_NAME, KM_SLEEP) == 0); 2658 5450 brendan VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2659 5450 brendan ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2660 7754 Jeff spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2661 7754 Jeff spa_load_l2cache(spa); 2662 7754 Jeff spa_config_exit(spa, SCL_ALL, FTAG); 2663 5450 brendan spa->spa_l2cache.sav_sync = B_TRUE; 2664 789 ahrens } 2665 789 ahrens 2666 7184 timh spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2667 789 ahrens spa->spa_meta_objset = dp->dp_meta_objset; 2668 789 ahrens 2669 10956 George /* 2670 10956 George * Create DDTs (dedup tables). 2671 10956 George */ 2672 10956 George ddt_create(spa); 2673 10956 George 2674 10956 George spa_update_dspace(spa); 2675 10956 George 2676 789 ahrens tx = dmu_tx_create_assigned(dp, txg); 2677 789 ahrens 2678 789 ahrens /* 2679 789 ahrens * Create the pool config object. 2680 789 ahrens */ 2681 789 ahrens spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2682 7497 Tim DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2683 789 ahrens DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2684 789 ahrens 2685 1544 eschrock if (zap_add(spa->spa_meta_objset, 2686 789 ahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2687 1544 eschrock sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2688 1544 eschrock cmn_err(CE_PANIC, "failed to add pool config"); 2689 2082 eschrock } 2690 2082 eschrock 2691 5094 lling /* Newly created pools with the right version are always deflated. */ 2692 5094 lling if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2693 5094 lling spa->spa_deflate = TRUE; 2694 5094 lling if (zap_add(spa->spa_meta_objset, 2695 5094 lling DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2696 5094 lling sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2697 5094 lling cmn_err(CE_PANIC, "failed to add deflate"); 2698 5094 lling } 2699 1544 eschrock } 2700 789 ahrens 2701 789 ahrens /* 2702 789 ahrens * Create the deferred-free bplist object. Turn off compression 2703 789 ahrens * because sync-to-convergence takes longer if the blocksize 2704 789 ahrens * keeps changing. 2705 789 ahrens */ 2706 10922 Jeff spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset, 2707 789 ahrens 1 << 14, tx); 2708 10922 Jeff dmu_object_set_compress(spa->spa_meta_objset, 2709 10922 Jeff spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx); 2710 789 ahrens 2711 1544 eschrock if (zap_add(spa->spa_meta_objset, 2712 789 ahrens DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2713 10922 Jeff sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) { 2714 1544 eschrock cmn_err(CE_PANIC, "failed to add bplist"); 2715 1544 eschrock } 2716 2926 ek110237 2717 2926 ek110237 /* 2718 2926 ek110237 * Create the pool's history object. 2719 2926 ek110237 */ 2720 5094 lling if (version >= SPA_VERSION_ZPOOL_HISTORY) 2721 5094 lling spa_history_create_obj(spa, tx); 2722 5094 lling 2723 5094 lling /* 2724 5094 lling * Set pool properties. 2725 5094 lling */ 2726 5094 lling spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2727 5094 lling spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 2728 5329 gw25295 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2729 9816 George spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 2730 10922 Jeff 2731 8525 Eric if (props != NULL) { 2732 8525 Eric spa_configfile_set(spa, props, B_FALSE); 2733 5094 lling spa_sync_props(spa, props, CRED(), tx); 2734 8525 Eric } 2735 789 ahrens 2736 789 ahrens dmu_tx_commit(tx); 2737 789 ahrens 2738 789 ahrens spa->spa_sync_on = B_TRUE; 2739 789 ahrens txg_sync_start(spa->spa_dsl_pool); 2740 789 ahrens 2741 789 ahrens /* 2742 789 ahrens * We explicitly wait for the first transaction to complete so that our 2743 789 ahrens * bean counters are appropriately updated. 2744 789 ahrens */ 2745 789 ahrens txg_wait_synced(spa->spa_dsl_pool, txg); 2746 789 ahrens 2747 6643 eschrock spa_config_sync(spa, B_FALSE, B_TRUE); 2748 4715 ek110237 2749 5094 lling if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2750 4715 ek110237 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2751 9946 Mark spa_history_log_version(spa, LOG_POOL_CREATE); 2752 789 ahrens 2753 7046 ahrens spa->spa_minref = refcount_count(&spa->spa_refcount); 2754 8667 George 2755 4451 eschrock mutex_exit(&spa_namespace_lock); 2756 789 ahrens 2757 789 ahrens return (0); 2758 6423 gw25295 } 2759 6423 gw25295 2760 6423 gw25295 #ifdef _KERNEL 2761 6423 gw25295 /* 2762 9790 Lin * Get the root pool information from the root disk, then import the root pool 2763 9790 Lin * during the system boot up time. 2764 9790 Lin */ 2765 9790 Lin extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 2766 9790 Lin 2767 9790 Lin static nvlist_t * 2768 9790 Lin spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 2769 9790 Lin { 2770 9790 Lin nvlist_t *config; 2771 6423 gw25295 nvlist_t *nvtop, *nvroot; 2772 6423 gw25295 uint64_t pgid; 2773 6423 gw25295 2774 9790 Lin if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 2775 9790 Lin return (NULL); 2776 9790 Lin 2777 6423 gw25295 /* 2778 6423 gw25295 * Add this top-level vdev to the child array. 2779 6423 gw25295 */ 2780 9790 Lin VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 2781 9790 Lin &nvtop) == 0); 2782 9790 Lin VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 2783 9790 Lin &pgid) == 0); 2784 9790 Lin VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 2785 6423 gw25295 2786 6423 gw25295 /* 2787 6423 gw25295 * Put this pool's top-level vdevs into a root vdev. 2788 6423 gw25295 */ 2789 6423 gw25295 VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2790 9790 Lin VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 2791 9790 Lin VDEV_TYPE_ROOT) == 0); 2792 6423 gw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2793 6423 gw25295 VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2794 6423 gw25295 VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2795 6423 gw25295 &nvtop, 1) == 0); 2796 6423 gw25295 2797 6423 gw25295 /* 2798 6423 gw25295 * Replace the existing vdev_tree with the new root vdev in 2799 6423 gw25295 * this pool's configuration (remove the old, add the new). 2800 6423 gw25295 */ 2801 6423 gw25295 VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2802 6423 gw25295 nvlist_free(nvroot); 2803 9790 Lin return (config); 2804 9790 Lin } 2805 9790 Lin 2806 9790 Lin /* 2807 9790 Lin * Walk the vdev tree and see if we can find a device with "better" 2808 9790 Lin * configuration. A configuration is "better" if the label on that 2809 9790 Lin * device has a more recent txg. 2810 9790 Lin */ 2811 9790 Lin static void 2812 9790 Lin spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 2813 9790 Lin { 2814 9816 George for (int c = 0; c < vd->vdev_children; c++) 2815 9790 Lin spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 2816 9790 Lin 2817 9790 Lin if (vd->vdev_ops->vdev_op_leaf) { 2818 9790 Lin nvlist_t *label; 2819 9790 Lin uint64_t label_txg; 2820 9790 Lin 2821 9790 Lin if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 2822 9790 Lin &label) != 0) 2823 9790 Lin return; 2824 9790 Lin 2825 9790 Lin VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 2826 9790 Lin &label_txg) == 0); 2827 9790 Lin 2828 9790 Lin /* 2829 9790 Lin * Do we have a better boot device? 2830 9790 Lin */ 2831 9790 Lin if (label_txg > *txg) { 2832 9790 Lin *txg = label_txg; 2833 9790 Lin *avd = vd; 2834 9790 Lin } 2835 9790 Lin nvlist_free(label); 2836 9790 Lin } 2837 7147 taylor } 2838 7147 taylor 2839 6423 gw25295 /* 2840 6423 gw25295 * Import a root pool. 2841 6423 gw25295 * 2842 7147 taylor * For x86. devpath_list will consist of devid and/or physpath name of 2843 7147 taylor * the vdev (e.g. "id1,sd (at) SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2844 7147 taylor * The GRUB "findroot" command will return the vdev we should boot. 2845 6423 gw25295 * 2846 6423 gw25295 * For Sparc, devpath_list consists the physpath name of the booting device 2847 6423 gw25295 * no matter the rootpool is a single device pool or a mirrored pool. 2848 6423 gw25295 * e.g. 2849 6423 gw25295 * "/pci@1f,0/ide@d/disk@0,0:a" 2850 6423 gw25295 */ 2851 6423 gw25295 int 2852 7147 taylor spa_import_rootpool(char *devpath, char *devid) 2853 6423 gw25295 { 2854 9790 Lin spa_t *spa; 2855 9790 Lin vdev_t *rvd, *bvd, *avd = NULL; 2856 9790 Lin nvlist_t *config, *nvtop; 2857 9790 Lin uint64_t guid, txg; 2858 6423 gw25295 char *pname; 2859 6423 gw25295 int error; 2860 9790 Lin 2861 9790 Lin /* 2862 9790 Lin * Read the label from the boot device and generate a configuration. 2863 9790 Lin */ 2864 10822 Jack config = spa_generate_rootconf(devpath, devid, &guid); 2865 10822 Jack #if defined(_OBP) && defined(_KERNEL) 2866 10822 Jack if (config == NULL) { 2867 10822 Jack if (strstr(devpath, "/iscsi/ssd") != NULL) { 2868 10822 Jack /* iscsi boot */ 2869 10822 Jack get_iscsi_bootpath_phy(devpath); 2870 10822 Jack config = spa_generate_rootconf(devpath, devid, &guid); 2871 10822 Jack } 2872 10822 Jack } 2873 10822 Jack #endif 2874 10822 Jack if (config == NULL) { 2875 9790 Lin cmn_err(CE_NOTE, "Can not read the pool label from '%s'", 2876 9790 Lin devpath); 2877 9790 Lin return (EIO); 2878 9790 Lin } 2879 9790 Lin 2880 9790 Lin VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 2881 9790 Lin &pname) == 0); 2882 9790 Lin VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2883 6423 gw25295 2884 9425 Eric mutex_enter(&spa_namespace_lock); 2885 9425 Eric if ((spa = spa_lookup(pname)) != NULL) { 2886 9425 Eric /* 2887 9425 Eric * Remove the existing root pool from the namespace so that we 2888 9425 Eric * can replace it with the correct config we just read in. 2889 9425 Eric */ 2890 9425 Eric spa_remove(spa); 2891 9425 Eric } 2892 9425 Eric 2893 10921 Tim spa = spa_add(pname, config, NULL); 2894 9425 Eric spa->spa_is_root = B_TRUE; 2895 10100 Lin spa->spa_load_verbatim = B_TRUE; 2896 9790 Lin 2897 9790 Lin /* 2898 9790 Lin * Build up a vdev tree based on the boot device's label config. 2899 9790 Lin */ 2900 9790 Lin VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 2901 9790 Lin &nvtop) == 0); 2902 9790 Lin spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2903 9790 Lin error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 2904 9790 Lin VDEV_ALLOC_ROOTPOOL); 2905 9790 Lin spa_config_exit(spa, SCL_ALL, FTAG); 2906 9790 Lin if (error) { 2907 9790 Lin mutex_exit(&spa_namespace_lock); 2908 9790 Lin nvlist_free(config); 2909 9790 Lin cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 2910 9790 Lin pname); 2911 9790 Lin return (error); 2912 9790 Lin } 2913 9790 Lin 2914 9790 Lin /* 2915 9790 Lin * Get the boot vdev. 2916 9790 Lin */ 2917 9790 Lin if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 2918 9790 Lin cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 2919 9790 Lin (u_longlong_t)guid); 2920 9790 Lin error = ENOENT; 2921 9790 Lin goto out; 2922 9790 Lin } 2923 9790 Lin 2924 9790 Lin /* 2925 9790 Lin * Determine if there is a better boot device. 2926 9790 Lin */ 2927 9790 Lin avd = bvd; 2928 9790 Lin spa_alt_rootvdev(rvd, &avd, &txg); 2929 9790 Lin if (avd != bvd) { 2930 9790 Lin cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 2931 9790 Lin "try booting from '%s'", avd->vdev_path); 2932 9790 Lin error = EINVAL; 2933 9790 Lin goto out; 2934 9790 Lin } 2935 9790 Lin 2936 9790 Lin /* 2937 9790 Lin * If the boot device is part of a spare vdev then ensure that 2938 9790 Lin * we're booting off the active spare. 2939 9790 Lin */ 2940 9790 Lin if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 2941 9790 Lin !bvd->vdev_isspare) { 2942 9790 Lin cmn_err(CE_NOTE, "The boot device is currently spared. Please " 2943 9790 Lin "try booting from '%s'", 2944 9790 Lin bvd->vdev_parent->vdev_child[1]->vdev_path); 2945 9790 Lin error = EINVAL; 2946 9790 Lin goto out; 2947 9790 Lin } 2948 9790 Lin 2949 9790 Lin error = 0; 2950 9946 Mark spa_history_log_version(spa, LOG_POOL_IMPORT); 2951 9790 Lin out: 2952 9790 Lin spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2953 9790 Lin vdev_free(rvd); 2954 9790 Lin spa_config_exit(spa, SCL_ALL, FTAG); 2955 9790 Lin mutex_exit(&spa_namespace_lock); 2956 9790 Lin 2957 9790 Lin nvlist_free(config); 2958 9790 Lin return (error); 2959 9790 Lin } 2960 9790 Lin 2961 6423 gw25295 #endif 2962 6423 gw25295 2963 6423 gw25295 /* 2964 9425 Eric * Take a pool and insert it into the namespace as if it had been loaded at 2965 9425 Eric * boot. 2966 9425 Eric */ 2967 9425 Eric int 2968 9425 Eric spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props) 2969 9425 Eric { 2970 9425 Eric spa_t *spa; 2971 10921 Tim zpool_rewind_policy_t policy; 2972 9425 Eric char *altroot = NULL; 2973 9425 Eric 2974 9425 Eric mutex_enter(&spa_namespace_lock); 2975 9425 Eric if (spa_lookup(pool) != NULL) { 2976 9425 Eric mutex_exit(&spa_namespace_lock); 2977 9425 Eric return (EEXIST); 2978 9425 Eric } 2979 9425 Eric 2980 9425 Eric (void) nvlist_lookup_string(props, 2981 9425 Eric zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2982 10921 Tim spa = spa_add(pool, config, altroot); 2983 10921 Tim 2984 10921 Tim zpool_get_rewind_policy(config, &policy); 2985 10921 Tim spa->spa_load_max_txg = policy.zrp_txg; 2986 9425 Eric 2987 10100 Lin spa->spa_load_verbatim = B_TRUE; 2988 9425 Eric 2989 9425 Eric if (props != NULL) 2990 9425 Eric spa_configfile_set(spa, props, B_FALSE); 2991 9425 Eric 2992 9425 Eric spa_config_sync(spa, B_FALSE, B_TRUE); 2993 9425 Eric 2994 9425 Eric mutex_exit(&spa_namespace_lock); 2995 9946 Mark spa_history_log_version(spa, LOG_POOL_IMPORT); 2996 9425 Eric 2997 9425 Eric return (0); 2998 9425 Eric } 2999 9425 Eric 3000 9425 Eric /* 3001 6423 gw25295 * Import a non-root pool into the system. 3002 6423 gw25295 */ 3003 6423 gw25295 int 3004 6423 gw25295 spa_import(const char *pool, nvlist_t *config, nvlist_t *props) 3005 6423 gw25295 { 3006 9425 Eric spa_t *spa; 3007 9425 Eric char *altroot = NULL; 3008 10921 Tim spa_load_state_t state = SPA_LOAD_IMPORT; 3009 10921 Tim zpool_rewind_policy_t policy; 3010 9425 Eric int error; 3011 9425 Eric nvlist_t *nvroot; 3012 9425 Eric nvlist_t **spares, **l2cache; 3013 9425 Eric uint_t nspares, nl2cache; 3014 9425 Eric 3015 9425 Eric /* 3016 <