1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 2082 eschrock 22 789 ahrens /* 23 10105 adam * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 789 ahrens * Use is subject to license terms. 25 789 ahrens */ 26 789 ahrens 27 789 ahrens /* 28 789 ahrens * Functions to convert between a list of vdevs and an nvlist representing the 29 789 ahrens * configuration. Each entry in the list can be one of: 30 789 ahrens * 31 789 ahrens * Device vdevs 32 789 ahrens * disk=(path=..., devid=...) 33 789 ahrens * file=(path=...) 34 789 ahrens * 35 789 ahrens * Group vdevs 36 2082 eschrock * raidz[1|2]=(...) 37 789 ahrens * mirror=(...) 38 2082 eschrock * 39 2082 eschrock * Hot spares 40 789 ahrens * 41 789 ahrens * While the underlying implementation supports it, group vdevs cannot contain 42 789 ahrens * other group vdevs. All userland verification of devices is contained within 43 789 ahrens * this file. If successful, the nvlist returned can be passed directly to the 44 789 ahrens * kernel; we've done as much verification as possible in userland. 45 2082 eschrock * 46 2082 eschrock * Hot spares are a special case, and passed down as an array of disk vdevs, at 47 2082 eschrock * the same level as the root of the vdev tree. 48 789 ahrens * 49 4276 taylor * The only function exported by this file is 'make_root_vdev'. The 50 4276 taylor * function performs several passes: 51 789 ahrens * 52 789 ahrens * 1. Construct the vdev specification. Performs syntax validation and 53 789 ahrens * makes sure each device is valid. 54 789 ahrens * 2. Check for devices in use. Using libdiskmgt, makes sure that no 55 789 ahrens * devices are also in use. Some can be overridden using the 'force' 56 789 ahrens * flag, others cannot. 57 789 ahrens * 3. Check for replication errors if the 'force' flag is not specified. 58 789 ahrens * validates that the replication level is consistent across the 59 789 ahrens * entire pool. 60 4276 taylor * 4. Call libzfs to label any whole disks with an EFI label. 61 789 ahrens */ 62 789 ahrens 63 789 ahrens #include <assert.h> 64 789 ahrens #include <devid.h> 65 789 ahrens #include <errno.h> 66 789 ahrens #include <fcntl.h> 67 789 ahrens #include <libdiskmgt.h> 68 789 ahrens #include <libintl.h> 69 789 ahrens #include <libnvpair.h> 70 10105 adam #include <limits.h> 71 789 ahrens #include <stdio.h> 72 789 ahrens #include <string.h> 73 789 ahrens #include <unistd.h> 74 789 ahrens #include <sys/efi_partition.h> 75 789 ahrens #include <sys/stat.h> 76 789 ahrens #include <sys/vtoc.h> 77 789 ahrens #include <sys/mntent.h> 78 789 ahrens 79 789 ahrens #include "zpool_util.h" 80 789 ahrens 81 789 ahrens #define DISK_ROOT "/dev/dsk" 82 789 ahrens #define RDISK_ROOT "/dev/rdsk" 83 789 ahrens #define BACKUP_SLICE "s2" 84 789 ahrens 85 789 ahrens /* 86 789 ahrens * For any given vdev specification, we can have multiple errors. The 87 789 ahrens * vdev_error() function keeps track of whether we have seen an error yet, and 88 789 ahrens * prints out a header if its the first error we've seen. 89 789 ahrens */ 90 2082 eschrock boolean_t error_seen; 91 2082 eschrock boolean_t is_force; 92 789 ahrens 93 2082 eschrock /*PRINTFLIKE1*/ 94 2082 eschrock static void 95 789 ahrens vdev_error(const char *fmt, ...) 96 789 ahrens { 97 789 ahrens va_list ap; 98 789 ahrens 99 789 ahrens if (!error_seen) { 100 789 ahrens (void) fprintf(stderr, gettext("invalid vdev specification\n")); 101 789 ahrens if (!is_force) 102 789 ahrens (void) fprintf(stderr, gettext("use '-f' to override " 103 789 ahrens "the following errors:\n")); 104 789 ahrens else 105 789 ahrens (void) fprintf(stderr, gettext("the following errors " 106 789 ahrens "must be manually repaired:\n")); 107 2082 eschrock error_seen = B_TRUE; 108 789 ahrens } 109 789 ahrens 110 789 ahrens va_start(ap, fmt); 111 789 ahrens (void) vfprintf(stderr, fmt, ap); 112 789 ahrens va_end(ap); 113 789 ahrens } 114 789 ahrens 115 1352 eschrock static void 116 1352 eschrock libdiskmgt_error(int error) 117 789 ahrens { 118 1544 eschrock /* 119 2082 eschrock * ENXIO/ENODEV is a valid error message if the device doesn't live in 120 1544 eschrock * /dev/dsk. Don't bother printing an error message in this case. 121 1544 eschrock */ 122 2082 eschrock if (error == ENXIO || error == ENODEV) 123 1544 eschrock return; 124 1544 eschrock 125 1352 eschrock (void) fprintf(stderr, gettext("warning: device in use checking " 126 1352 eschrock "failed: %s\n"), strerror(error)); 127 789 ahrens } 128 789 ahrens 129 789 ahrens /* 130 1352 eschrock * Validate a device, passing the bulk of the work off to libdiskmgt. 131 789 ahrens */ 132 4276 taylor static int 133 2082 eschrock check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) 134 789 ahrens { 135 1352 eschrock char *msg; 136 1352 eschrock int error = 0; 137 4946 mmusante dm_who_type_t who; 138 789 ahrens 139 4946 mmusante if (force) 140 4946 mmusante who = DM_WHO_ZPOOL_FORCE; 141 4946 mmusante else if (isspare) 142 4946 mmusante who = DM_WHO_ZPOOL_SPARE; 143 4946 mmusante else 144 4946 mmusante who = DM_WHO_ZPOOL; 145 4946 mmusante 146 4946 mmusante if (dm_inuse((char *)path, &msg, who, &error) || error) { 147 1352 eschrock if (error != 0) { 148 1352 eschrock libdiskmgt_error(error); 149 1352 eschrock return (0); 150 3741 mmusante } else { 151 1352 eschrock vdev_error("%s", msg); 152 1352 eschrock free(msg); 153 4082 mmusante return (-1); 154 1352 eschrock } 155 789 ahrens } 156 789 ahrens 157 1352 eschrock /* 158 1352 eschrock * If we're given a whole disk, ignore overlapping slices since we're 159 1352 eschrock * about to label it anyway. 160 1352 eschrock */ 161 1352 eschrock error = 0; 162 1352 eschrock if (!wholedisk && !force && 163 1352 eschrock (dm_isoverlapping((char *)path, &msg, &error) || error)) { 164 4082 mmusante if (error == 0) { 165 4082 mmusante /* dm_isoverlapping returned -1 */ 166 4082 mmusante vdev_error(gettext("%s overlaps with %s\n"), path, msg); 167 4082 mmusante free(msg); 168 4082 mmusante return (-1); 169 4082 mmusante } else if (error != ENODEV) { 170 4082 mmusante /* libdiskmgt's devcache only handles physical drives */ 171 1352 eschrock libdiskmgt_error(error); 172 1352 eschrock return (0); 173 1352 eschrock } 174 1352 eschrock } 175 789 ahrens 176 4082 mmusante return (0); 177 789 ahrens } 178 789 ahrens 179 4276 taylor 180 789 ahrens /* 181 789 ahrens * Validate a whole disk. Iterate over all slices on the disk and make sure 182 789 ahrens * that none is in use by calling check_slice(). 183 789 ahrens */ 184 4276 taylor static int 185 2082 eschrock check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) 186 789 ahrens { 187 789 ahrens dm_descriptor_t *drive, *media, *slice; 188 789 ahrens int err = 0; 189 789 ahrens int i; 190 789 ahrens int ret; 191 789 ahrens 192 789 ahrens /* 193 789 ahrens * Get the drive associated with this disk. This should never fail, 194 789 ahrens * because we already have an alias handle open for the device. 195 789 ahrens */ 196 789 ahrens if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, 197 1352 eschrock &err)) == NULL || *drive == NULL) { 198 1352 eschrock if (err) 199 1352 eschrock libdiskmgt_error(err); 200 1352 eschrock return (0); 201 1352 eschrock } 202 789 ahrens 203 789 ahrens if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, 204 1352 eschrock &err)) == NULL) { 205 1352 eschrock dm_free_descriptors(drive); 206 1352 eschrock if (err) 207 1352 eschrock libdiskmgt_error(err); 208 1352 eschrock return (0); 209 1352 eschrock } 210 789 ahrens 211 789 ahrens dm_free_descriptors(drive); 212 789 ahrens 213 789 ahrens /* 214 789 ahrens * It is possible that the user has specified a removable media drive, 215 789 ahrens * and the media is not present. 216 789 ahrens */ 217 789 ahrens if (*media == NULL) { 218 1352 eschrock dm_free_descriptors(media); 219 789 ahrens vdev_error(gettext("'%s' has no media in drive\n"), name); 220 789 ahrens return (-1); 221 789 ahrens } 222 789 ahrens 223 789 ahrens if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, 224 1352 eschrock &err)) == NULL) { 225 1352 eschrock dm_free_descriptors(media); 226 1352 eschrock if (err) 227 1352 eschrock libdiskmgt_error(err); 228 1352 eschrock return (0); 229 1352 eschrock } 230 789 ahrens 231 789 ahrens dm_free_descriptors(media); 232 789 ahrens 233 789 ahrens ret = 0; 234 789 ahrens 235 789 ahrens /* 236 789 ahrens * Iterate over all slices and report any errors. We don't care about 237 789 ahrens * overlapping slices because we are using the whole disk. 238 789 ahrens */ 239 789 ahrens for (i = 0; slice[i] != NULL; i++) { 240 2082 eschrock char *name = dm_get_name(slice[i], &err); 241 2082 eschrock 242 2082 eschrock if (check_slice(name, force, B_TRUE, isspare) != 0) 243 789 ahrens ret = -1; 244 2082 eschrock 245 2082 eschrock dm_free_name(name); 246 789 ahrens } 247 789 ahrens 248 789 ahrens dm_free_descriptors(slice); 249 789 ahrens return (ret); 250 789 ahrens } 251 789 ahrens 252 789 ahrens /* 253 1352 eschrock * Validate a device. 254 789 ahrens */ 255 4276 taylor static int 256 2082 eschrock check_device(const char *path, boolean_t force, boolean_t isspare) 257 789 ahrens { 258 789 ahrens dm_descriptor_t desc; 259 789 ahrens int err; 260 1352 eschrock char *dev; 261 789 ahrens 262 789 ahrens /* 263 789 ahrens * For whole disks, libdiskmgt does not include the leading dev path. 264 789 ahrens */ 265 789 ahrens dev = strrchr(path, '/'); 266 789 ahrens assert(dev != NULL); 267 789 ahrens dev++; 268 1352 eschrock if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { 269 2082 eschrock err = check_disk(path, desc, force, isspare); 270 1352 eschrock dm_free_descriptor(desc); 271 1352 eschrock return (err); 272 789 ahrens } 273 789 ahrens 274 2082 eschrock return (check_slice(path, force, B_FALSE, isspare)); 275 789 ahrens } 276 789 ahrens 277 789 ahrens /* 278 789 ahrens * Check that a file is valid. All we can do in this case is check that it's 279 4082 mmusante * not in use by another pool, and not in use by swap. 280 789 ahrens */ 281 4276 taylor static int 282 2082 eschrock check_file(const char *file, boolean_t force, boolean_t isspare) 283 789 ahrens { 284 1352 eschrock char *name; 285 789 ahrens int fd; 286 789 ahrens int ret = 0; 287 4082 mmusante int err; 288 1352 eschrock pool_state_t state; 289 2082 eschrock boolean_t inuse; 290 4082 mmusante 291 4082 mmusante if (dm_inuse_swap(file, &err)) { 292 4082 mmusante if (err) 293 4082 mmusante libdiskmgt_error(err); 294 4082 mmusante else 295 4082 mmusante vdev_error(gettext("%s is currently used by swap. " 296 4082 mmusante "Please see swap(1M).\n"), file); 297 4082 mmusante return (-1); 298 4082 mmusante } 299 789 ahrens 300 789 ahrens if ((fd = open(file, O_RDONLY)) < 0) 301 789 ahrens return (0); 302 789 ahrens 303 2082 eschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { 304 1352 eschrock const char *desc; 305 1352 eschrock 306 1352 eschrock switch (state) { 307 1352 eschrock case POOL_STATE_ACTIVE: 308 1352 eschrock desc = gettext("active"); 309 1352 eschrock break; 310 1352 eschrock 311 1352 eschrock case POOL_STATE_EXPORTED: 312 1352 eschrock desc = gettext("exported"); 313 1352 eschrock break; 314 1352 eschrock 315 1352 eschrock case POOL_STATE_POTENTIALLY_ACTIVE: 316 1352 eschrock desc = gettext("potentially active"); 317 1352 eschrock break; 318 1352 eschrock 319 1352 eschrock default: 320 1352 eschrock desc = gettext("unknown"); 321 1352 eschrock break; 322 1352 eschrock } 323 1352 eschrock 324 2082 eschrock /* 325 2082 eschrock * Allow hot spares to be shared between pools. 326 2082 eschrock */ 327 2082 eschrock if (state == POOL_STATE_SPARE && isspare) 328 2082 eschrock return (0); 329 2082 eschrock 330 2082 eschrock if (state == POOL_STATE_ACTIVE || 331 2082 eschrock state == POOL_STATE_SPARE || !force) { 332 2082 eschrock switch (state) { 333 2082 eschrock case POOL_STATE_SPARE: 334 2082 eschrock vdev_error(gettext("%s is reserved as a hot " 335 2082 eschrock "spare for pool %s\n"), file, name); 336 2082 eschrock break; 337 2082 eschrock default: 338 2082 eschrock vdev_error(gettext("%s is part of %s pool " 339 2082 eschrock "'%s'\n"), file, desc, name); 340 2082 eschrock break; 341 2082 eschrock } 342 789 ahrens ret = -1; 343 789 ahrens } 344 789 ahrens 345 789 ahrens free(name); 346 789 ahrens } 347 789 ahrens 348 789 ahrens (void) close(fd); 349 789 ahrens return (ret); 350 789 ahrens } 351 789 ahrens 352 4276 taylor 353 4276 taylor /* 354 4276 taylor * By "whole disk" we mean an entire physical disk (something we can 355 4276 taylor * label, toggle the write cache on, etc.) as opposed to the full 356 4276 taylor * capacity of a pseudo-device such as lofi or did. We act as if we 357 4276 taylor * are labeling the disk, which should be a pretty good test of whether 358 4276 taylor * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if 359 4276 taylor * it isn't. 360 4276 taylor */ 361 2082 eschrock static boolean_t 362 4276 taylor is_whole_disk(const char *arg) 363 789 ahrens { 364 4276 taylor struct dk_gpt *label; 365 4276 taylor int fd; 366 4276 taylor char path[MAXPATHLEN]; 367 789 ahrens 368 4276 taylor (void) snprintf(path, sizeof (path), "%s%s%s", 369 4276 taylor RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); 370 4276 taylor if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) 371 4276 taylor return (B_FALSE); 372 4276 taylor if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { 373 4276 taylor (void) close(fd); 374 4276 taylor return (B_FALSE); 375 4276 taylor } 376 4276 taylor efi_free(label); 377 4276 taylor (void) close(fd); 378 4276 taylor return (B_TRUE); 379 789 ahrens } 380 789 ahrens 381 789 ahrens /* 382 789 ahrens * Create a leaf vdev. Determine if this is a file or a device. If it's a 383 789 ahrens * device, fill in the device id to make a complete nvlist. Valid forms for a 384 789 ahrens * leaf vdev are: 385 789 ahrens * 386 789 ahrens * /dev/dsk/xxx Complete disk path 387 789 ahrens * /xxx Full path to file 388 789 ahrens * xxx Shorthand for /dev/dsk/xxx 389 789 ahrens */ 390 4276 taylor static nvlist_t * 391 4527 perrin make_leaf_vdev(const char *arg, uint64_t is_log) 392 789 ahrens { 393 789 ahrens char path[MAXPATHLEN]; 394 789 ahrens struct stat64 statbuf; 395 789 ahrens nvlist_t *vdev = NULL; 396 789 ahrens char *type = NULL; 397 2082 eschrock boolean_t wholedisk = B_FALSE; 398 789 ahrens 399 789 ahrens /* 400 789 ahrens * Determine what type of vdev this is, and put the full path into 401 789 ahrens * 'path'. We detect whether this is a device of file afterwards by 402 789 ahrens * checking the st_mode of the file. 403 789 ahrens */ 404 789 ahrens if (arg[0] == '/') { 405 789 ahrens /* 406 789 ahrens * Complete device or file path. Exact type is determined by 407 789 ahrens * examining the file descriptor afterwards. 408 789 ahrens */ 409 4276 taylor wholedisk = is_whole_disk(arg); 410 4276 taylor if (!wholedisk && (stat64(arg, &statbuf) != 0)) { 411 789 ahrens (void) fprintf(stderr, 412 789 ahrens gettext("cannot open '%s': %s\n"), 413 789 ahrens arg, strerror(errno)); 414 789 ahrens return (NULL); 415 789 ahrens } 416 789 ahrens 417 789 ahrens (void) strlcpy(path, arg, sizeof (path)); 418 789 ahrens } else { 419 789 ahrens /* 420 789 ahrens * This may be a short path for a device, or it could be total 421 789 ahrens * gibberish. Check to see if it's a known device in 422 789 ahrens * /dev/dsk/. As part of this check, see if we've been given a 423 789 ahrens * an entire disk (minus the slice number). 424 789 ahrens */ 425 789 ahrens (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, 426 789 ahrens arg); 427 4276 taylor wholedisk = is_whole_disk(path); 428 4276 taylor if (!wholedisk && (stat64(path, &statbuf) != 0)) { 429 789 ahrens /* 430 789 ahrens * If we got ENOENT, then the user gave us 431 789 ahrens * gibberish, so try to direct them with a 432 789 ahrens * reasonable error message. Otherwise, 433 789 ahrens * regurgitate strerror() since it's the best we 434 789 ahrens * can do. 435 789 ahrens */ 436 789 ahrens if (errno == ENOENT) { 437 789 ahrens (void) fprintf(stderr, 438 789 ahrens gettext("cannot open '%s': no such " 439 789 ahrens "device in %s\n"), arg, DISK_ROOT); 440 789 ahrens (void) fprintf(stderr, 441 789 ahrens gettext("must be a full path or " 442 789 ahrens "shorthand device name\n")); 443 789 ahrens return (NULL); 444 789 ahrens } else { 445 789 ahrens (void) fprintf(stderr, 446 789 ahrens gettext("cannot open '%s': %s\n"), 447 789 ahrens path, strerror(errno)); 448 789 ahrens return (NULL); 449 789 ahrens } 450 789 ahrens } 451 789 ahrens } 452 789 ahrens 453 789 ahrens /* 454 789 ahrens * Determine whether this is a device or a file. 455 789 ahrens */ 456 4276 taylor if (wholedisk || S_ISBLK(statbuf.st_mode)) { 457 789 ahrens type = VDEV_TYPE_DISK; 458 789 ahrens } else if (S_ISREG(statbuf.st_mode)) { 459 789 ahrens type = VDEV_TYPE_FILE; 460 789 ahrens } else { 461 789 ahrens (void) fprintf(stderr, gettext("cannot use '%s': must be a " 462 789 ahrens "block device or regular file\n"), path); 463 789 ahrens return (NULL); 464 789 ahrens } 465 789 ahrens 466 789 ahrens /* 467 789 ahrens * Finally, we have the complete device or file, and we know that it is 468 789 ahrens * acceptable to use. Construct the nvlist to describe this vdev. All 469 789 ahrens * vdevs have a 'path' element, and devices also have a 'devid' element. 470 789 ahrens */ 471 789 ahrens verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0); 472 789 ahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); 473 789 ahrens verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); 474 4527 perrin verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0); 475 1171 eschrock if (strcmp(type, VDEV_TYPE_DISK) == 0) 476 1171 eschrock verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, 477 1171 eschrock (uint64_t)wholedisk) == 0); 478 789 ahrens 479 789 ahrens /* 480 789 ahrens * For a whole disk, defer getting its devid until after labeling it. 481 789 ahrens */ 482 789 ahrens if (S_ISBLK(statbuf.st_mode) && !wholedisk) { 483 789 ahrens /* 484 789 ahrens * Get the devid for the device. 485 789 ahrens */ 486 789 ahrens int fd; 487 789 ahrens ddi_devid_t devid; 488 789 ahrens char *minor = NULL, *devid_str = NULL; 489 789 ahrens 490 789 ahrens if ((fd = open(path, O_RDONLY)) < 0) { 491 789 ahrens (void) fprintf(stderr, gettext("cannot open '%s': " 492 789 ahrens "%s\n"), path, strerror(errno)); 493 789 ahrens nvlist_free(vdev); 494 789 ahrens return (NULL); 495 789 ahrens } 496 789 ahrens 497 789 ahrens if (devid_get(fd, &devid) == 0) { 498 789 ahrens if (devid_get_minor_name(fd, &minor) == 0 && 499 789 ahrens (devid_str = devid_str_encode(devid, minor)) != 500 789 ahrens NULL) { 501 789 ahrens verify(nvlist_add_string(vdev, 502 789 ahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 503 789 ahrens } 504 789 ahrens if (devid_str != NULL) 505 789 ahrens devid_str_free(devid_str); 506 789 ahrens if (minor != NULL) 507 789 ahrens devid_str_free(minor); 508 789 ahrens devid_free(devid); 509 789 ahrens } 510 789 ahrens 511 789 ahrens (void) close(fd); 512 789 ahrens } 513 789 ahrens 514 789 ahrens return (vdev); 515 789 ahrens } 516 789 ahrens 517 789 ahrens /* 518 789 ahrens * Go through and verify the replication level of the pool is consistent. 519 789 ahrens * Performs the following checks: 520 789 ahrens * 521 789 ahrens * For the new spec, verifies that devices in mirrors and raidz are the 522 789 ahrens * same size. 523 789 ahrens * 524 789 ahrens * If the current configuration already has inconsistent replication 525 789 ahrens * levels, ignore any other potential problems in the new spec. 526 789 ahrens * 527 789 ahrens * Otherwise, make sure that the current spec (if there is one) and the new 528 789 ahrens * spec have consistent replication levels. 529 789 ahrens */ 530 789 ahrens typedef struct replication_level { 531 2082 eschrock char *zprl_type; 532 2082 eschrock uint64_t zprl_children; 533 2082 eschrock uint64_t zprl_parity; 534 789 ahrens } replication_level_t; 535 789 ahrens 536 4276 taylor #define ZPOOL_FUZZ (16 * 1024 * 1024) 537 4276 taylor 538 789 ahrens /* 539 789 ahrens * Given a list of toplevel vdevs, return the current replication level. If 540 789 ahrens * the config is inconsistent, then NULL is returned. If 'fatal' is set, then 541 789 ahrens * an error message will be displayed for each self-inconsistent vdev. 542 789 ahrens */ 543 4276 taylor static replication_level_t * 544 2082 eschrock get_replication(nvlist_t *nvroot, boolean_t fatal) 545 789 ahrens { 546 789 ahrens nvlist_t **top; 547 789 ahrens uint_t t, toplevels; 548 789 ahrens nvlist_t **child; 549 789 ahrens uint_t c, children; 550 789 ahrens nvlist_t *nv; 551 789 ahrens char *type; 552 789 ahrens replication_level_t lastrep, rep, *ret; 553 2082 eschrock boolean_t dontreport; 554 789 ahrens 555 789 ahrens ret = safe_malloc(sizeof (replication_level_t)); 556 789 ahrens 557 789 ahrens verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 558 789 ahrens &top, &toplevels) == 0); 559 789 ahrens 560 2082 eschrock lastrep.zprl_type = NULL; 561 789 ahrens for (t = 0; t < toplevels; t++) { 562 4527 perrin uint64_t is_log = B_FALSE; 563 4527 perrin 564 789 ahrens nv = top[t]; 565 789 ahrens 566 4527 perrin /* 567 4527 perrin * For separate logs we ignore the top level vdev replication 568 4527 perrin * constraints. 569 4527 perrin */ 570 4527 perrin (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); 571 4527 perrin if (is_log) 572 4527 perrin continue; 573 4527 perrin 574 4527 perrin verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, 575 4527 perrin &type) == 0); 576 789 ahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 577 789 ahrens &child, &children) != 0) { 578 789 ahrens /* 579 789 ahrens * This is a 'file' or 'disk' vdev. 580 789 ahrens */ 581 2082 eschrock rep.zprl_type = type; 582 2082 eschrock rep.zprl_children = 1; 583 2082 eschrock rep.zprl_parity = 0; 584 789 ahrens } else { 585 789 ahrens uint64_t vdev_size; 586 789 ahrens 587 789 ahrens /* 588 789 ahrens * This is a mirror or RAID-Z vdev. Go through and make 589 789 ahrens * sure the contents are all the same (files vs. disks), 590 789 ahrens * keeping track of the number of elements in the 591 789 ahrens * process. 592 789 ahrens * 593 789 ahrens * We also check that the size of each vdev (if it can 594 789 ahrens * be determined) is the same. 595 789 ahrens */ 596 2082 eschrock rep.zprl_type = type; 597 2082 eschrock rep.zprl_children = 0; 598 2082 eschrock 599 2082 eschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 600 2082 eschrock verify(nvlist_lookup_uint64(nv, 601 2082 eschrock ZPOOL_CONFIG_NPARITY, 602 2082 eschrock &rep.zprl_parity) == 0); 603 2082 eschrock assert(rep.zprl_parity != 0); 604 2082 eschrock } else { 605 2082 eschrock rep.zprl_parity = 0; 606 2082 eschrock } 607 789 ahrens 608 789 ahrens /* 609 4527 perrin * The 'dontreport' variable indicates that we've 610 789 ahrens * already reported an error for this spec, so don't 611 789 ahrens * bother doing it again. 612 789 ahrens */ 613 789 ahrens type = NULL; 614 789 ahrens dontreport = 0; 615 789 ahrens vdev_size = -1ULL; 616 789 ahrens for (c = 0; c < children; c++) { 617 789 ahrens nvlist_t *cnv = child[c]; 618 789 ahrens char *path; 619 789 ahrens struct stat64 statbuf; 620 789 ahrens uint64_t size = -1ULL; 621 789 ahrens char *childtype; 622 789 ahrens int fd, err; 623 789 ahrens 624 2082 eschrock rep.zprl_children++; 625 789 ahrens 626 789 ahrens verify(nvlist_lookup_string(cnv, 627 789 ahrens ZPOOL_CONFIG_TYPE, &childtype) == 0); 628 2142 eschrock 629 2142 eschrock /* 630 4527 perrin * If this is a replacing or spare vdev, then 631 2142 eschrock * get the real first child of the vdev. 632 2142 eschrock */ 633 2142 eschrock if (strcmp(childtype, 634 2142 eschrock VDEV_TYPE_REPLACING) == 0 || 635 2142 eschrock strcmp(childtype, VDEV_TYPE_SPARE) == 0) { 636 2142 eschrock nvlist_t **rchild; 637 2142 eschrock uint_t rchildren; 638 2142 eschrock 639 2142 eschrock verify(nvlist_lookup_nvlist_array(cnv, 640 2142 eschrock ZPOOL_CONFIG_CHILDREN, &rchild, 641 2142 eschrock &rchildren) == 0); 642 2142 eschrock assert(rchildren == 2); 643 2142 eschrock cnv = rchild[0]; 644 2142 eschrock 645 2142 eschrock verify(nvlist_lookup_string(cnv, 646 2142 eschrock ZPOOL_CONFIG_TYPE, 647 2142 eschrock &childtype) == 0); 648 2142 eschrock } 649 2142 eschrock 650 789 ahrens verify(nvlist_lookup_string(cnv, 651 789 ahrens ZPOOL_CONFIG_PATH, &path) == 0); 652 789 ahrens 653 789 ahrens /* 654 789 ahrens * If we have a raidz/mirror that combines disks 655 789 ahrens * with files, report it as an error. 656 789 ahrens */ 657 789 ahrens if (!dontreport && type != NULL && 658 789 ahrens strcmp(type, childtype) != 0) { 659 789 ahrens if (ret != NULL) 660 789 ahrens free(ret); 661 789 ahrens ret = NULL; 662 789 ahrens if (fatal) 663 789 ahrens vdev_error(gettext( 664 789 ahrens "mismatched replication " 665 789 ahrens "level: %s contains both " 666 789 ahrens "files and devices\n"), 667 2082 eschrock rep.zprl_type); 668 789 ahrens else 669 789 ahrens return (NULL); 670 2082 eschrock dontreport = B_TRUE; 671 789 ahrens } 672 789 ahrens 673 789 ahrens /* 674 789 ahrens * According to stat(2), the value of 'st_size' 675 789 ahrens * is undefined for block devices and character 676 789 ahrens * devices. But there is no effective way to 677 789 ahrens * determine the real size in userland. 678 789 ahrens * 679 789 ahrens * Instead, we'll take advantage of an 680 789 ahrens * implementation detail of spec_size(). If the 681 789 ahrens * device is currently open, then we (should) 682 789 ahrens * return a valid size. 683 789 ahrens * 684 789 ahrens * If we still don't get a valid size (indicated 685 789 ahrens * by a size of 0 or MAXOFFSET_T), then ignore 686 789 ahrens * this device altogether. 687 789 ahrens */ 688 789 ahrens if ((fd = open(path, O_RDONLY)) >= 0) { 689 789 ahrens err = fstat64(fd, &statbuf); 690 789 ahrens (void) close(fd); 691 789 ahrens } else { 692 789 ahrens err = stat64(path, &statbuf); 693 789 ahrens } 694 789 ahrens 695 789 ahrens if (err != 0 || 696 789 ahrens statbuf.st_size == 0 || 697 789 ahrens statbuf.st_size == MAXOFFSET_T) 698 789 ahrens continue; 699 789 ahrens 700 789 ahrens size = statbuf.st_size; 701 789 ahrens 702 789 ahrens /* 703 4276 taylor * Also make sure that devices and 704 4276 taylor * slices have a consistent size. If 705 4276 taylor * they differ by a significant amount 706 4276 taylor * (~16MB) then report an error. 707 789 ahrens */ 708 4276 taylor if (!dontreport && 709 4276 taylor (vdev_size != -1ULL && 710 4276 taylor (labs(size - vdev_size) > 711 4276 taylor ZPOOL_FUZZ))) { 712 789 ahrens if (ret != NULL) 713 789 ahrens free(ret); 714 789 ahrens ret = NULL; 715 789 ahrens if (fatal) 716 789 ahrens vdev_error(gettext( 717 789 ahrens "%s contains devices of " 718 789 ahrens "different sizes\n"), 719 2082 eschrock rep.zprl_type); 720 789 ahrens else 721 789 ahrens return (NULL); 722 2082 eschrock dontreport = B_TRUE; 723 789 ahrens } 724 789 ahrens 725 789 ahrens type = childtype; 726 789 ahrens vdev_size = size; 727 789 ahrens } 728 789 ahrens } 729 789 ahrens 730 789 ahrens /* 731 789 ahrens * At this point, we have the replication of the last toplevel 732 789 ahrens * vdev in 'rep'. Compare it to 'lastrep' to see if its 733 789 ahrens * different. 734 789 ahrens */ 735 2082 eschrock if (lastrep.zprl_type != NULL) { 736 2082 eschrock if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) { 737 789 ahrens if (ret != NULL) 738 789 ahrens free(ret); 739 789 ahrens ret = NULL; 740 789 ahrens if (fatal) 741 789 ahrens vdev_error(gettext( 742 2082 eschrock "mismatched replication level: " 743 2082 eschrock "both %s and %s vdevs are " 744 789 ahrens "present\n"), 745 2082 eschrock lastrep.zprl_type, rep.zprl_type); 746 789 ahrens else 747 789 ahrens return (NULL); 748 2082 eschrock } else if (lastrep.zprl_parity != rep.zprl_parity) { 749 789 ahrens if (ret) 750 789 ahrens free(ret); 751 789 ahrens ret = NULL; 752 789 ahrens if (fatal) 753 789 ahrens vdev_error(gettext( 754 2082 eschrock "mismatched replication level: " 755 2082 eschrock "both %llu and %llu device parity " 756 2082 eschrock "%s vdevs are present\n"), 757 2082 eschrock lastrep.zprl_parity, 758 2082 eschrock rep.zprl_parity, 759 2082 eschrock rep.zprl_type); 760 2082 eschrock else 761 2082 eschrock return (NULL); 762 2082 eschrock } else if (lastrep.zprl_children != rep.zprl_children) { 763 2082 eschrock if (ret) 764 2082 eschrock free(ret); 765 2082 eschrock ret = NULL; 766 2082 eschrock if (fatal) 767 2082 eschrock vdev_error(gettext( 768 2082 eschrock "mismatched replication level: " 769 2082 eschrock "both %llu-way and %llu-way %s " 770 789 ahrens "vdevs are present\n"), 771 2082 eschrock lastrep.zprl_children, 772 2082 eschrock rep.zprl_children, 773 2082 eschrock rep.zprl_type); 774 789 ahrens else 775 789 ahrens return (NULL); 776 789 ahrens } 777 789 ahrens } 778 789 ahrens lastrep = rep; 779 789 ahrens } 780 789 ahrens 781 2082 eschrock if (ret != NULL) 782 2082 eschrock *ret = rep; 783 789 ahrens 784 789 ahrens return (ret); 785 789 ahrens } 786 789 ahrens 787 789 ahrens /* 788 789 ahrens * Check the replication level of the vdev spec against the current pool. Calls 789 789 ahrens * get_replication() to make sure the new spec is self-consistent. If the pool 790 789 ahrens * has a consistent replication level, then we ignore any errors. Otherwise, 791 789 ahrens * report any difference between the two. 792 789 ahrens */ 793 4276 taylor static int 794 789 ahrens check_replication(nvlist_t *config, nvlist_t *newroot) 795 789 ahrens { 796 4276 taylor nvlist_t **child; 797 4276 taylor uint_t children; 798 789 ahrens replication_level_t *current = NULL, *new; 799 789 ahrens int ret; 800 789 ahrens 801 789 ahrens /* 802 789 ahrens * If we have a current pool configuration, check to see if it's 803 789 ahrens * self-consistent. If not, simply return success. 804 789 ahrens */ 805 789 ahrens if (config != NULL) { 806 789 ahrens nvlist_t *nvroot; 807 789 ahrens 808 789 ahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 809 789 ahrens &nvroot) == 0); 810 2082 eschrock if ((current = get_replication(nvroot, B_FALSE)) == NULL) 811 789 ahrens return (0); 812 4276 taylor } 813 4276 taylor /* 814 4276 taylor * for spares there may be no children, and therefore no 815 4276 taylor * replication level to check 816 4276 taylor */ 817 4276 taylor if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN, 818 4276 taylor &child, &children) != 0) || (children == 0)) { 819 4527 perrin free(current); 820 4527 perrin return (0); 821 4527 perrin } 822 4527 perrin 823 4527 perrin /* 824 4527 perrin * If all we have is logs then there's no replication level to check. 825 4527 perrin */ 826 4527 perrin if (num_logs(newroot) == children) { 827 4276 taylor free(current); 828 4276 taylor return (0); 829 789 ahrens } 830 789 ahrens 831 789 ahrens /* 832 789 ahrens * Get the replication level of the new vdev spec, reporting any 833 789 ahrens * inconsistencies found. 834 789 ahrens */ 835 2082 eschrock if ((new = get_replication(newroot, B_TRUE)) == NULL) { 836 789 ahrens free(current); 837 789 ahrens return (-1); 838 789 ahrens } 839 789 ahrens 840 789 ahrens /* 841 789 ahrens * Check to see if the new vdev spec matches the replication level of 842 789 ahrens * the current pool. 843 789 ahrens */ 844 789 ahrens ret = 0; 845 789 ahrens if (current != NULL) { 846 2082 eschrock if (strcmp(current->zprl_type, new->zprl_type) != 0) { 847 789 ahrens vdev_error(gettext( 848 2082 eschrock "mismatched replication level: pool uses %s " 849 2082 eschrock "and new vdev is %s\n"), 850 2082 eschrock current->zprl_type, new->zprl_type); 851 2082 eschrock ret = -1; 852 2082 eschrock } else if (current->zprl_parity != new->zprl_parity) { 853 2082 eschrock vdev_error(gettext( 854 2082 eschrock "mismatched replication level: pool uses %llu " 855 2082 eschrock "device parity and new vdev uses %llu\n"), 856 2082 eschrock current->zprl_parity, new->zprl_parity); 857 2082 eschrock ret = -1; 858 2082 eschrock } else if (current->zprl_children != new->zprl_children) { 859 2082 eschrock vdev_error(gettext( 860 2082 eschrock "mismatched replication level: pool uses %llu-way " 861 2082 eschrock "%s and new vdev uses %llu-way %s\n"), 862 2082 eschrock current->zprl_children, current->zprl_type, 863 2082 eschrock new->zprl_children, new->zprl_type); 864 789 ahrens ret = -1; 865 789 ahrens } 866 789 ahrens } 867 789 ahrens 868 789 ahrens free(new); 869 789 ahrens if (current != NULL) 870 789 ahrens free(current); 871 789 ahrens 872 789 ahrens return (ret); 873 789 ahrens } 874 789 ahrens 875 789 ahrens /* 876 789 ahrens * Go through and find any whole disks in the vdev specification, labelling them 877 789 ahrens * as appropriate. When constructing the vdev spec, we were unable to open this 878 789 ahrens * device in order to provide a devid. Now that we have labelled the disk and 879 789 ahrens * know that slice 0 is valid, we can construct the devid now. 880 789 ahrens * 881 4276 taylor * If the disk was already labeled with an EFI label, we will have gotten the 882 789 ahrens * devid already (because we were able to open the whole disk). Otherwise, we 883 789 ahrens * need to get the devid after we label the disk. 884 789 ahrens */ 885 4276 taylor static int 886 4276 taylor make_disks(zpool_handle_t *zhp, nvlist_t *nv) 887 789 ahrens { 888 789 ahrens nvlist_t **child; 889 789 ahrens uint_t c, children; 890 789 ahrens char *type, *path, *diskname; 891 789 ahrens char buf[MAXPATHLEN]; 892 1171 eschrock uint64_t wholedisk; 893 789 ahrens int fd; 894 789 ahrens int ret; 895 789 ahrens ddi_devid_t devid; 896 789 ahrens char *minor = NULL, *devid_str = NULL; 897 789 ahrens 898 789 ahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 899 789 ahrens 900 789 ahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 901 789 ahrens &child, &children) != 0) { 902 789 ahrens 903 789 ahrens if (strcmp(type, VDEV_TYPE_DISK) != 0) 904 789 ahrens return (0); 905 789 ahrens 906 789 ahrens /* 907 789 ahrens * We have a disk device. Get the path to the device 908 4276 taylor * and see if it's a whole disk by appending the backup 909 789 ahrens * slice and stat()ing the device. 910 789 ahrens */ 911 789 ahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 912 1171 eschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 913 1171 eschrock &wholedisk) != 0 || !wholedisk) 914 789 ahrens return (0); 915 789 ahrens 916 789 ahrens diskname = strrchr(path, '/'); 917 789 ahrens assert(diskname != NULL); 918 789 ahrens diskname++; 919 4276 taylor if (zpool_label_disk(g_zfs, zhp, diskname) == -1) 920 789 ahrens return (-1); 921 789 ahrens 922 789 ahrens /* 923 789 ahrens * Fill in the devid, now that we've labeled the disk. 924 789 ahrens */ 925 789 ahrens (void) snprintf(buf, sizeof (buf), "%ss0", path); 926 789 ahrens if ((fd = open(buf, O_RDONLY)) < 0) { 927 789 ahrens (void) fprintf(stderr, 928 789 ahrens gettext("cannot open '%s': %s\n"), 929 789 ahrens buf, strerror(errno)); 930 789 ahrens return (-1); 931 789 ahrens } 932 789 ahrens 933 789 ahrens if (devid_get(fd, &devid) == 0) { 934 789 ahrens if (devid_get_minor_name(fd, &minor) == 0 && 935 789 ahrens (devid_str = devid_str_encode(devid, minor)) != 936 789 ahrens NULL) { 937 789 ahrens verify(nvlist_add_string(nv, 938 789 ahrens ZPOOL_CONFIG_DEVID, devid_str) == 0); 939 789 ahrens } 940 789 ahrens if (devid_str != NULL) 941 789 ahrens devid_str_free(devid_str); 942 789 ahrens if (minor != NULL) 943 789 ahrens devid_str_free(minor); 944 789 ahrens devid_free(devid); 945 789 ahrens } 946 1171 eschrock 947 1171 eschrock /* 948 1171 eschrock * Update the path to refer to the 's0' slice. The presence of 949 1171 eschrock * the 'whole_disk' field indicates to the CLI that we should 950 1171 eschrock * chop off the slice number when displaying the device in 951 1171 eschrock * future output. 952 1171 eschrock */ 953 1171 eschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); 954 789 ahrens 955 789 ahrens (void) close(fd); 956 789 ahrens 957 789 ahrens return (0); 958 789 ahrens } 959 789 ahrens 960 789 ahrens for (c = 0; c < children; c++) 961 4276 taylor if ((ret = make_disks(zhp, child[c])) != 0) 962 789 ahrens return (ret); 963 789 ahrens 964 2082 eschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 965 2082 eschrock &child, &children) == 0) 966 2082 eschrock for (c = 0; c < children; c++) 967 4276 taylor if ((ret = make_disks(zhp, child[c])) != 0) 968 2082 eschrock return (ret); 969 2082 eschrock 970 5450 brendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 971 5450 brendan &child, &children) == 0) 972 5450 brendan for (c = 0; c < children; c++) 973 5450 brendan if ((ret = make_disks(zhp, child[c])) != 0) 974 5450 brendan return (ret); 975 5450 brendan 976 789 ahrens return (0); 977 2082 eschrock } 978 2082 eschrock 979 2082 eschrock /* 980 2082 eschrock * Determine if the given path is a hot spare within the given configuration. 981 2082 eschrock */ 982 2082 eschrock static boolean_t 983 2082 eschrock is_spare(nvlist_t *config, const char *path) 984 2082 eschrock { 985 2082 eschrock int fd; 986 2082 eschrock pool_state_t state; 987 3265 ahrens char *name = NULL; 988 2082 eschrock nvlist_t *label; 989 2082 eschrock uint64_t guid, spareguid; 990 2082 eschrock nvlist_t *nvroot; 991 2082 eschrock nvlist_t **spares; 992 2082 eschrock uint_t i, nspares; 993 2082 eschrock boolean_t inuse; 994 2082 eschrock 995 2082 eschrock if ((fd = open(path, O_RDONLY)) < 0) 996 2082 eschrock return (B_FALSE); 997 2082 eschrock 998 2082 eschrock if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || 999 2082 eschrock !inuse || 1000 2082 eschrock state != POOL_STATE_SPARE || 1001 2082 eschrock zpool_read_label(fd, &label) != 0) { 1002 3265 ahrens free(name); 1003 2082 eschrock (void) close(fd); 1004 2082 eschrock return (B_FALSE); 1005 2082 eschrock } 1006 3265 ahrens free(name); 1007 2082 eschrock 1008 2082 eschrock (void) close(fd); 1009 2082 eschrock verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0); 1010 2082 eschrock nvlist_free(label); 1011 2082 eschrock 1012 2082 eschrock verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 1013 2082 eschrock &nvroot) == 0); 1014 2082 eschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 1015 2082 eschrock &spares, &nspares) == 0) { 1016 2082 eschrock for (i = 0; i < nspares; i++) { 1017 2082 eschrock verify(nvlist_lookup_uint64(spares[i], 1018 2082 eschrock ZPOOL_CONFIG_GUID, &spareguid) == 0); 1019 2082 eschrock if (spareguid == guid) 1020 2082 eschrock return (B_TRUE); 1021 2082 eschrock } 1022 2082 eschrock } 1023 2082 eschrock 1024 2082 eschrock return (B_FALSE); 1025 789 ahrens } 1026 789 ahrens 1027 789 ahrens /* 1028 789 ahrens * Go through and find any devices that are in use. We rely on libdiskmgt for 1029 789 ahrens * the majority of this task. 1030 789 ahrens */ 1031 4276 taylor static int 1032 2082 eschrock check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing, 1033 2082 eschrock int isspare) 1034 789 ahrens { 1035 789 ahrens nvlist_t **child; 1036 789 ahrens uint_t c, children; 1037 789 ahrens char *type, *path; 1038 789 ahrens int ret; 1039 2082 eschrock char buf[MAXPATHLEN]; 1040 2082 eschrock uint64_t wholedisk; 1041 789 ahrens 1042 789 ahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); 1043 789 ahrens 1044 789 ahrens if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1045 789 ahrens &child, &children) != 0) { 1046 789 ahrens 1047 789 ahrens verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); 1048 789 ahrens 1049 2082 eschrock /* 1050 2082 eschrock * As a generic check, we look to see if this is a replace of a 1051 2082 eschrock * hot spare within the same pool. If so, we allow it 1052 2082 eschrock * regardless of what libdiskmgt or zpool_in_use() says. 1053 2082 eschrock */ 1054 2082 eschrock if (isreplacing) { 1055 2082 eschrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, 1056 2082 eschrock &wholedisk) == 0 && wholedisk) 1057 2082 eschrock (void) snprintf(buf, sizeof (buf), "%ss0", 1058 2082 eschrock path); 1059 2082 eschrock else 1060 2082 eschrock (void) strlcpy(buf, path, sizeof (buf)); 1061 2082 eschrock if (is_spare(config, buf)) 1062 2082 eschrock return (0); 1063 2082 eschrock } 1064 2082 eschrock 1065 789 ahrens if (strcmp(type, VDEV_TYPE_DISK) == 0) 1066 2082 eschrock ret = check_device(path, force, isspare); 1067 789 ahrens 1068 789 ahrens if (strcmp(type, VDEV_TYPE_FILE) == 0) 1069 2082 eschrock ret = check_file(path, force, isspare); 1070 789 ahrens 1071 789 ahrens return (ret); 1072 789 ahrens } 1073 789 ahrens 1074 789 ahrens for (c = 0; c < children; c++) 1075 2082 eschrock if ((ret = check_in_use(config, child[c], force, 1076 2082 eschrock isreplacing, B_FALSE)) != 0) 1077 789 ahrens return (ret); 1078 789 ahrens 1079 2082 eschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, 1080 2082 eschrock &child, &children) == 0) 1081 2082 eschrock for (c = 0; c < children; c++) 1082 2082 eschrock if ((ret = check_in_use(config, child[c], force, 1083 2082 eschrock isreplacing, B_TRUE)) != 0) 1084 2082 eschrock return (ret); 1085 5450 brendan 1086 5450 brendan if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 1087 5450 brendan &child, &children) == 0) 1088 5450 brendan for (c = 0; c < children; c++) 1089 5450 brendan if ((ret = check_in_use(config, child[c], force, 1090 5450 brendan isreplacing, B_FALSE)) != 0) 1091 5450 brendan return (ret); 1092 5450 brendan 1093 789 ahrens return (0); 1094 2082 eschrock } 1095 2082 eschrock 1096 4276 taylor static const char * 1097 10105 adam is_grouping(const char *type, int *mindev, int *maxdev) 1098 2082 eschrock { 1099 10105 adam if (strncmp(type, "raidz", 5) == 0) { 1100 10105 adam const char *p = type + 5; 1101 10105 adam char *end; 1102 10105 adam long nparity; 1103 10105 adam 1104 10105 adam if (*p == '\0') { 1105 10105 adam nparity = 1; 1106 10105 adam } else if (*p == '0') { 1107 10105 adam return (NULL); /* no zero prefixes allowed */ 1108 10105 adam } else { 1109 10105 adam errno = 0; 1110 10105 adam nparity = strtol(p, &end, 10); 1111 10105 adam if (errno != 0 || nparity < 1 || nparity >= 255 || 1112 10105 adam *end != '\0') 1113 10105 adam return (NULL); 1114 10105 adam } 1115 10105 adam 1116 2082 eschrock if (mindev != NULL) 1117 10105 adam *mindev = nparity + 1; 1118 10105 adam if (maxdev != NULL) 1119 10105 adam *maxdev = 255; 1120 2082 eschrock return (VDEV_TYPE_RAIDZ); 1121 2082 eschrock } 1122 2082 eschrock 1123 10105 adam if (maxdev != NULL) 1124 10105 adam *maxdev = INT_MAX; 1125 2082 eschrock 1126 2082 eschrock if (strcmp(type, "mirror") == 0) { 1127 2082 eschrock if (mindev != NULL) 1128 2082 eschrock *mindev = 2; 1129 2082 eschrock return (VDEV_TYPE_MIRROR); 1130 2082 eschrock } 1131 2082 eschrock 1132 2082 eschrock if (strcmp(type, "spare") == 0) { 1133 2082 eschrock if (mindev != NULL) 1134 2082 eschrock *mindev = 1; 1135 2082 eschrock return (VDEV_TYPE_SPARE); 1136 2082 eschrock } 1137 2082 eschrock 1138 4527 perrin if (strcmp(type, "log") == 0) { 1139 4527 perrin if (mindev != NULL) 1140 4527 perrin *mindev = 1; 1141 4527 perrin return (VDEV_TYPE_LOG); 1142 4527 perrin } 1143 4527 perrin 1144 5450 brendan if (strcmp(type, "cache") == 0) { 1145 5450 brendan if (mindev != NULL) 1146 5450 brendan *mindev = 1; 1147 5450 brendan return (VDEV_TYPE_L2CACHE); 1148 5450 brendan } 1149 5450 brendan 1150 2082 eschrock return (NULL); 1151 789 ahrens } 1152 789 ahrens 1153 789 ahrens /* 1154 789 ahrens * Construct a syntactically valid vdev specification, 1155 789 ahrens * and ensure that all devices and files exist and can be opened. 1156 789 ahrens * Note: we don't bother freeing anything in the error paths 1157 789 ahrens * because the program is just going to exit anyway. 1158 789 ahrens */ 1159 789 ahrens nvlist_t * 1160 789 ahrens construct_spec(int argc, char **argv) 1161 789 ahrens { 1162 5450 brendan nvlist_t *nvroot, *nv, **top, **spares, **l2cache; 1163 10105 adam int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; 1164 2082 eschrock const char *type; 1165 4527 perrin uint64_t is_log; 1166 4527 perrin boolean_t seen_logs; 1167 789 ahrens 1168 789 ahrens top = NULL; 1169 789 ahrens toplevels = 0; 1170 2082 eschrock spares = NULL; 1171 5450 brendan l2cache = NULL; 1172 2082 eschrock nspares = 0; 1173 4527 perrin nlogs = 0; 1174 5450 brendan nl2cache = 0; 1175 4527 perrin is_log = B_FALSE; 1176 4527 perrin seen_logs = B_FALSE; 1177 789 ahrens 1178 789 ahrens while (argc > 0) { 1179 789 ahrens nv = NULL; 1180 789 ahrens 1181 789 ahrens /* 1182 789 ahrens * If it's a mirror or raidz, the subsequent arguments are 1183 789 ahrens * its leaves -- until we encounter the next mirror or raidz. 1184 789 ahrens */ 1185 10105 adam if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) { 1186 2082 eschrock nvlist_t **child = NULL; 1187 2082 eschrock int c, children = 0; 1188 789 ahrens 1189 4527 perrin if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 1190 4527 perrin if (spares != NULL) { 1191 4527 perrin (void) fprintf(stderr, 1192 4527 perrin gettext("invalid vdev " 1193 4527 perrin "specification: 'spare' can be " 1194 4527 perrin "specified only once\n")); 1195 4527 perrin return (NULL); 1196 4527 perrin } 1197 4527 perrin is_log = B_FALSE; 1198 4527 perrin } 1199 4527 perrin 1200 4527 perrin if (strcmp(type, VDEV_TYPE_LOG) == 0) { 1201 4527 perrin if (seen_logs) { 1202 4527 perrin (void) fprintf(stderr, 1203 4527 perrin gettext("invalid vdev " 1204 4527 perrin "specification: 'log' can be " 1205 4527 perrin "specified only once\n")); 1206 4527 perrin return (NULL); 1207 4527 perrin } 1208 4527 perrin seen_logs = B_TRUE; 1209 4527 perrin is_log = B_TRUE; 1210 4527 perrin argc--; 1211 4527 perrin argv++; 1212 4527 perrin /* 1213 4527 perrin * A log is not a real grouping device. 1214 4527 perrin * We just set is_log and continue. 1215 4527 perrin */ 1216 4527 perrin continue; 1217 4527 perrin } 1218 4527 perrin 1219 5450 brendan if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1220 5450 brendan if (l2cache != NULL) { 1221 5450 brendan (void) fprintf(stderr, 1222 5450 brendan gettext("invalid vdev " 1223 5450 brendan "specification: 'cache' can be " 1224 5450 brendan "specified only once\n")); 1225 5450 brendan return (NULL); 1226 5450 brendan } 1227 5450 brendan is_log = B_FALSE; 1228 5450 brendan } 1229 5450 brendan 1230 4527 perrin if (is_log) { 1231 4527 perrin if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { 1232 4527 perrin (void) fprintf(stderr, 1233 4527 perrin gettext("invalid vdev " 1234 4527 perrin "specification: unsupported 'log' " 1235 4527 perrin "device: %s\n"), type); 1236 4527 perrin return (NULL); 1237 4527 perrin } 1238 4527 perrin nlogs++; 1239 2082 eschrock } 1240 789 ahrens 1241 789 ahrens for (c = 1; c < argc; c++) { 1242 10105 adam if (is_grouping(argv[c], NULL, NULL) != NULL) 1243 789 ahrens break; 1244 789 ahrens children++; 1245 789 ahrens child = realloc(child, 1246 789 ahrens children * sizeof (nvlist_t *)); 1247 789 ahrens if (child == NULL) 1248 2856 nd150628 zpool_no_memory(); 1249 4527 perrin if ((nv = make_leaf_vdev(argv[c], B_FALSE)) 1250 4527 perrin == NULL) 1251 789 ahrens return (NULL); 1252 789 ahrens child[children - 1] = nv; 1253 789 ahrens } 1254 789 ahrens 1255 2082 eschrock if (children < mindev) { 1256 2082 eschrock (void) fprintf(stderr, gettext("invalid vdev " 1257 2082 eschrock "specification: %s requires at least %d " 1258 2082 eschrock "devices\n"), argv[0], mindev); 1259 10105 adam return (NULL); 1260 10105 adam } 1261 10105 adam 1262 10105 adam if (children > maxdev) { 1263 10105 adam (void) fprintf(stderr, gettext("invalid vdev " 1264 10105 adam "specification: %s supports no more than " 1265 10105 adam "%d devices\n"), argv[0], maxdev); 1266 2082 eschrock return (NULL); 1267 2082 eschrock } 1268 2082 eschrock 1269 789 ahrens argc -= c; 1270 789 ahrens argv += c; 1271 789 ahrens 1272 2082 eschrock if (strcmp(type, VDEV_TYPE_SPARE) == 0) { 1273 2082 eschrock spares = child; 1274 2082 eschrock nspares = children; 1275 5450 brendan continue; 1276 5450 brendan } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) { 1277 5450 brendan l2cache = child; 1278 5450 brendan nl2cache = children; 1279 2082 eschrock continue; 1280 2082 eschrock } else { 1281 2082 eschrock verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 1282 2082 eschrock 0) == 0); 1283 2082 eschrock verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, 1284 2082 eschrock type) == 0); 1285 4527 perrin verify(nvlist_add_uint64(nv, 1286 4527 perrin ZPOOL_CONFIG_IS_LOG, is_log) == 0); 1287 2082 eschrock if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { 1288 2082 eschrock verify(nvlist_add_uint64(nv, 1289 2082 eschrock ZPOOL_CONFIG_NPARITY, 1290 2082 eschrock mindev - 1) == 0); 1291 2082 eschrock } 1292 2082 eschrock verify(nvlist_add_nvlist_array(nv, 1293 2082 eschrock ZPOOL_CONFIG_CHILDREN, child, 1294 2082 eschrock children) == 0); 1295 2082 eschrock 1296 2082 eschrock for (c = 0; c < children; c++) 1297 2082 eschrock nvlist_free(child[c]); 1298 2082 eschrock free(child); 1299 789 ahrens } 1300 789 ahrens } else { 1301 789 ahrens /* 1302 789 ahrens * We have a device. Pass off to make_leaf_vdev() to 1303 789 ahrens * construct the appropriate nvlist describing the vdev. 1304 789 ahrens */ 1305 4527 perrin if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL) 1306 789 ahrens return (NULL); 1307 4527 perrin if (is_log) 1308 4527 perrin nlogs++; 1309 789 ahrens argc--; 1310 789 ahrens argv++; 1311 789 ahrens } 1312 789 ahrens 1313 789 ahrens toplevels++; 1314 789 ahrens top = realloc(top, toplevels * sizeof (nvlist_t *)); 1315 789 ahrens if (top == NULL) 1316 2856 nd150628 zpool_no_memory(); 1317 789 ahrens top[toplevels - 1] = nv; 1318 789 ahrens } 1319 789 ahrens 1320 5450 brendan if (toplevels == 0 && nspares == 0 && nl2cache == 0) { 1321 2082 eschrock (void) fprintf(stderr, gettext("invalid vdev " 1322 2082 eschrock "specification: at least one toplevel vdev must be " 1323 2082 eschrock "specified\n")); 1324 4527 perrin return (NULL); 1325 4527 perrin } 1326 4527 perrin 1327 4527 perrin if (seen_logs && nlogs == 0) { 1328 4527 perrin (void) fprintf(stderr, gettext("invalid vdev specification: " 1329 4527 perrin "log requires at least 1 device\n")); 1330 2082 eschrock return (NULL); 1331 2082 eschrock } 1332 2082 eschrock 1333 789 ahrens /* 1334 789 ahrens * Finally, create nvroot and add all top-level vdevs to it. 1335 789 ahrens */ 1336 789 ahrens verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); 1337 789 ahrens verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 1338 789 ahrens VDEV_TYPE_ROOT) == 0); 1339 789 ahrens verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1340 789 ahrens top, toplevels) == 0); 1341 2082 eschrock if (nspares != 0) 1342 2082 eschrock verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 1343 2082 eschrock spares, nspares) == 0); 1344 5450 brendan if (nl2cache != 0) 1345 5450 brendan verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 1346 5450 brendan l2cache, nl2cache) == 0); 1347 789 ahrens 1348 789 ahrens for (t = 0; t < toplevels; t++) 1349 789 ahrens nvlist_free(top[t]); 1350 2082 eschrock for (t = 0; t < nspares; t++) 1351 2082 eschrock nvlist_free(spares[t]); 1352 5450 brendan for (t = 0; t < nl2cache; t++) 1353 5450 brendan nvlist_free(l2cache[t]); 1354 2082 eschrock if (spares) 1355 2082 eschrock free(spares); 1356 5450 brendan if (l2cache) 1357 5450 brendan free(l2cache); 1358 789 ahrens free(top); 1359 789 ahrens 1360 789 ahrens return (nvroot); 1361 789 ahrens } 1362 789 ahrens 1363 4276 taylor 1364 789 ahrens /* 1365 789 ahrens * Get and validate the contents of the given vdev specification. This ensures 1366 789 ahrens * that the nvlist returned is well-formed, that all the devices exist, and that 1367 789 ahrens * they are not currently in use by any other known consumer. The 'poolconfig' 1368 789 ahrens * parameter is the current configuration of the pool when adding devices 1369 789 ahrens * existing pool, and is used to perform additional checks, such as changing the 1370 789 ahrens * replication level of the pool. It can be 'NULL' to indicate that this is a 1371 789 ahrens * new pool. The 'force' flag controls whether devices should be forcefully 1372 789 ahrens * added, even if they appear in use. 1373 789 ahrens */ 1374 789 ahrens nvlist_t * 1375 4276 taylor make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, 1376 7343 Eric boolean_t isreplacing, boolean_t dryrun, int argc, char **argv) 1377 789 ahrens { 1378 789 ahrens nvlist_t *newroot; 1379 4276 taylor nvlist_t *poolconfig = NULL; 1380 789 ahrens is_force = force; 1381 789 ahrens 1382 789 ahrens /* 1383 789 ahrens * Construct the vdev specification. If this is successful, we know 1384 789 ahrens * that we have a valid specification, and that all devices can be 1385 789 ahrens * opened. 1386 789 ahrens */ 1387 789 ahrens if ((newroot = construct_spec(argc, argv)) == NULL) 1388 4276 taylor return (NULL); 1389 4276 taylor 1390 4276 taylor if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) 1391 789 ahrens return (NULL); 1392 789 ahrens 1393 789 ahrens /* 1394 789 ahrens * Validate each device to make sure that its not shared with another 1395 789 ahrens * subsystem. We do this even if 'force' is set, because there are some 1396 789 ahrens * uses (such as a dedicated dump device) that even '-f' cannot 1397 789 ahrens * override. 1398 789 ahrens */ 1399 2082 eschrock if (check_in_use(poolconfig, newroot, force, isreplacing, 1400 2082 eschrock B_FALSE) != 0) { 1401 789 ahrens nvlist_free(newroot); 1402 789 ahrens return (NULL); 1403 789 ahrens } 1404 789 ahrens 1405 789 ahrens /* 1406 789 ahrens * Check the replication level of the given vdevs and report any errors 1407 789 ahrens * found. We include the existing pool spec, if any, as we need to 1408 789 ahrens * catch changes against the existing replication level. 1409 789 ahrens */ 1410 789 ahrens if (check_rep && check_replication(poolconfig, newroot) != 0) { 1411 789 ahrens nvlist_free(newroot); 1412 789 ahrens return (NULL); 1413 789 ahrens } 1414 789 ahrens 1415 789 ahrens /* 1416 789 ahrens * Run through the vdev specification and label any whole disks found. 1417 789 ahrens */ 1418 7343 Eric if (!dryrun && make_disks(zhp, newroot) != 0) { 1419 789 ahrens nvlist_free(newroot); 1420 789 ahrens return (NULL); 1421 789 ahrens } 1422 789 ahrens 1423 789 ahrens return (newroot); 1424 789 ahrens } 1425