Home | History | Annotate | Download | only in common
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22  10594    George  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens /*
     27    789    ahrens  * Pool import support functions.
     28    789    ahrens  *
     29    789    ahrens  * To import a pool, we rely on reading the configuration information from the
     30    789    ahrens  * ZFS label of each device.  If we successfully read the label, then we
     31    789    ahrens  * organize the configuration information in the following hierarchy:
     32    789    ahrens  *
     33    789    ahrens  * 	pool guid -> toplevel vdev guid -> label txg
     34    789    ahrens  *
     35    789    ahrens  * Duplicate entries matching this same tuple will be discarded.  Once we have
     36    789    ahrens  * examined every device, we pick the best label txg config for each toplevel
     37    789    ahrens  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
     38    789    ahrens  * update any paths that have changed.  Finally, we attempt to import the pool
     39    789    ahrens  * using our derived config, and record the results.
     40    789    ahrens  */
     41    789    ahrens 
     42  10980      Eric #include <ctype.h>
     43    789    ahrens #include <devid.h>
     44    789    ahrens #include <dirent.h>
     45    789    ahrens #include <errno.h>
     46    789    ahrens #include <libintl.h>
     47  10980      Eric #include <stddef.h>
     48    789    ahrens #include <stdlib.h>
     49    789    ahrens #include <string.h>
     50    789    ahrens #include <sys/stat.h>
     51    789    ahrens #include <unistd.h>
     52    789    ahrens #include <fcntl.h>
     53  10980      Eric #include <sys/vtoc.h>
     54  10980      Eric #include <sys/dktp/fdisk.h>
     55  10980      Eric #include <sys/efi_partition.h>
     56  10980      Eric #include <thread_pool.h>
     57    789    ahrens 
     58    789    ahrens #include <sys/vdev_impl.h>
     59    789    ahrens 
     60    789    ahrens #include "libzfs.h"
     61    789    ahrens #include "libzfs_impl.h"
     62    789    ahrens 
     63    789    ahrens /*
     64    789    ahrens  * Intermediate structures used to gather configuration information.
     65    789    ahrens  */
     66    789    ahrens typedef struct config_entry {
     67    789    ahrens 	uint64_t		ce_txg;
     68    789    ahrens 	nvlist_t		*ce_config;
     69    789    ahrens 	struct config_entry	*ce_next;
     70    789    ahrens } config_entry_t;
     71    789    ahrens 
     72    789    ahrens typedef struct vdev_entry {
     73    789    ahrens 	uint64_t		ve_guid;
     74    789    ahrens 	config_entry_t		*ve_configs;
     75    789    ahrens 	struct vdev_entry	*ve_next;
     76    789    ahrens } vdev_entry_t;
     77    789    ahrens 
     78    789    ahrens typedef struct pool_entry {
     79    789    ahrens 	uint64_t		pe_guid;
     80    789    ahrens 	vdev_entry_t		*pe_vdevs;
     81    789    ahrens 	struct pool_entry	*pe_next;
     82    789    ahrens } pool_entry_t;
     83    789    ahrens 
     84    789    ahrens typedef struct name_entry {
     85   2082  eschrock 	char			*ne_name;
     86    789    ahrens 	uint64_t		ne_guid;
     87    789    ahrens 	struct name_entry	*ne_next;
     88    789    ahrens } name_entry_t;
     89    789    ahrens 
     90    789    ahrens typedef struct pool_list {
     91    789    ahrens 	pool_entry_t		*pools;
     92    789    ahrens 	name_entry_t		*names;
     93    789    ahrens } pool_list_t;
     94    789    ahrens 
     95    789    ahrens static char *
     96    789    ahrens get_devid(const char *path)
     97    789    ahrens {
     98    789    ahrens 	int fd;
     99    789    ahrens 	ddi_devid_t devid;
    100    789    ahrens 	char *minor, *ret;
    101    789    ahrens 
    102    789    ahrens 	if ((fd = open(path, O_RDONLY)) < 0)
    103    789    ahrens 		return (NULL);
    104    789    ahrens 
    105    789    ahrens 	minor = NULL;
    106    789    ahrens 	ret = NULL;
    107    789    ahrens 	if (devid_get(fd, &devid) == 0) {
    108    789    ahrens 		if (devid_get_minor_name(fd, &minor) == 0)
    109    789    ahrens 			ret = devid_str_encode(devid, minor);
    110    789    ahrens 		if (minor != NULL)
    111    789    ahrens 			devid_str_free(minor);
    112    789    ahrens 		devid_free(devid);
    113    789    ahrens 	}
    114   1354  eschrock 	(void) close(fd);
    115    789    ahrens 
    116    789    ahrens 	return (ret);
    117    789    ahrens }
    118    789    ahrens 
    119    789    ahrens 
    120    789    ahrens /*
    121    789    ahrens  * Go through and fix up any path and/or devid information for the given vdev
    122    789    ahrens  * configuration.
    123    789    ahrens  */
    124   2082  eschrock static int
    125    789    ahrens fix_paths(nvlist_t *nv, name_entry_t *names)
    126    789    ahrens {
    127    789    ahrens 	nvlist_t **child;
    128    789    ahrens 	uint_t c, children;
    129    789    ahrens 	uint64_t guid;
    130   1354  eschrock 	name_entry_t *ne, *best;
    131   1354  eschrock 	char *path, *devid;
    132   1354  eschrock 	int matched;
    133    789    ahrens 
    134    789    ahrens 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
    135    789    ahrens 	    &child, &children) == 0) {
    136    789    ahrens 		for (c = 0; c < children; c++)
    137   2082  eschrock 			if (fix_paths(child[c], names) != 0)
    138   2082  eschrock 				return (-1);
    139   2082  eschrock 		return (0);
    140    789    ahrens 	}
    141    789    ahrens 
    142    789    ahrens 	/*
    143    789    ahrens 	 * This is a leaf (file or disk) vdev.  In either case, go through
    144    789    ahrens 	 * the name list and see if we find a matching guid.  If so, replace
    145    789    ahrens 	 * the path and see if we can calculate a new devid.
    146   1354  eschrock 	 *
    147   1354  eschrock 	 * There may be multiple names associated with a particular guid, in
    148   1354  eschrock 	 * which case we have overlapping slices or multiple paths to the same
    149   1354  eschrock 	 * disk.  If this is the case, then we want to pick the path that is
    150   1354  eschrock 	 * the most similar to the original, where "most similar" is the number
    151   1354  eschrock 	 * of matching characters starting from the end of the path.  This will
    152   1354  eschrock 	 * preserve slice numbers even if the disks have been reorganized, and
    153   1354  eschrock 	 * will also catch preferred disk names if multiple paths exist.
    154    789    ahrens 	 */
    155    789    ahrens 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
    156   1354  eschrock 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
    157   1354  eschrock 		path = NULL;
    158    789    ahrens 
    159   1354  eschrock 	matched = 0;
    160   1354  eschrock 	best = NULL;
    161   1354  eschrock 	for (ne = names; ne != NULL; ne = ne->ne_next) {
    162   1354  eschrock 		if (ne->ne_guid == guid) {
    163   1354  eschrock 			const char *src, *dst;
    164   1354  eschrock 			int count;
    165    789    ahrens 
    166   1354  eschrock 			if (path == NULL) {
    167   1354  eschrock 				best = ne;
    168   1354  eschrock 				break;
    169   1354  eschrock 			}
    170   1354  eschrock 
    171   1354  eschrock 			src = ne->ne_name + strlen(ne->ne_name) - 1;
    172   1354  eschrock 			dst = path + strlen(path) - 1;
    173   1354  eschrock 			for (count = 0; src >= ne->ne_name && dst >= path;
    174   1354  eschrock 			    src--, dst--, count++)
    175   1354  eschrock 				if (*src != *dst)
    176   1354  eschrock 					break;
    177   1354  eschrock 
    178   1354  eschrock 			/*
    179   1354  eschrock 			 * At this point, 'count' is the number of characters
    180   1354  eschrock 			 * matched from the end.
    181   1354  eschrock 			 */
    182   1354  eschrock 			if (count > matched || best == NULL) {
    183   1354  eschrock 				best = ne;
    184   1354  eschrock 				matched = count;
    185   1354  eschrock 			}
    186   1354  eschrock 		}
    187   1354  eschrock 	}
    188   1354  eschrock 
    189   1354  eschrock 	if (best == NULL)
    190   2082  eschrock 		return (0);
    191    789    ahrens 
    192   2082  eschrock 	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
    193   2082  eschrock 		return (-1);
    194    789    ahrens 
    195   1354  eschrock 	if ((devid = get_devid(best->ne_name)) == NULL) {
    196    789    ahrens 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
    197    789    ahrens 	} else {
    198   2082  eschrock 		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
    199   2082  eschrock 			return (-1);
    200    789    ahrens 		devid_str_free(devid);
    201    789    ahrens 	}
    202   2082  eschrock 
    203   2082  eschrock 	return (0);
    204    789    ahrens }
    205    789    ahrens 
    206    789    ahrens /*
    207    789    ahrens  * Add the given configuration to the list of known devices.
    208    789    ahrens  */
    209   2082  eschrock static int
    210   2082  eschrock add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
    211   2082  eschrock     nvlist_t *config)
    212    789    ahrens {
    213   2082  eschrock 	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
    214    789    ahrens 	pool_entry_t *pe;
    215    789    ahrens 	vdev_entry_t *ve;
    216    789    ahrens 	config_entry_t *ce;
    217    789    ahrens 	name_entry_t *ne;
    218   2082  eschrock 
    219   2082  eschrock 	/*
    220   5450   brendan 	 * If this is a hot spare not currently in use or level 2 cache
    221   5450   brendan 	 * device, add it to the list of names to translate, but don't do
    222   5450   brendan 	 * anything else.
    223   2082  eschrock 	 */
    224   2082  eschrock 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
    225   5450   brendan 	    &state) == 0 &&
    226   5450   brendan 	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
    227   2082  eschrock 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
    228   2082  eschrock 		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
    229   4055  eschrock 			return (-1);
    230   2082  eschrock 
    231   2082  eschrock 		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
    232   2082  eschrock 			free(ne);
    233   2082  eschrock 			return (-1);
    234   2082  eschrock 		}
    235   2082  eschrock 		ne->ne_guid = vdev_guid;
    236   2082  eschrock 		ne->ne_next = pl->names;
    237   2082  eschrock 		pl->names = ne;
    238   2082  eschrock 		return (0);
    239   2082  eschrock 	}
    240    789    ahrens 
    241    789    ahrens 	/*
    242    789    ahrens 	 * If we have a valid config but cannot read any of these fields, then
    243    789    ahrens 	 * it means we have a half-initialized label.  In vdev_label_init()
    244    789    ahrens 	 * we write a label with txg == 0 so that we can identify the device
    245    789    ahrens 	 * in case the user refers to the same disk later on.  If we fail to
    246    789    ahrens 	 * create the pool, we'll be left with a label in this state
    247    789    ahrens 	 * which should not be considered part of a valid pool.
    248    789    ahrens 	 */
    249    789    ahrens 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
    250    789    ahrens 	    &pool_guid) != 0 ||
    251    789    ahrens 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
    252    789    ahrens 	    &vdev_guid) != 0 ||
    253    789    ahrens 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
    254    789    ahrens 	    &top_guid) != 0 ||
    255    789    ahrens 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
    256    789    ahrens 	    &txg) != 0 || txg == 0) {
    257    789    ahrens 		nvlist_free(config);
    258   2082  eschrock 		return (0);
    259    789    ahrens 	}
    260    789    ahrens 
    261    789    ahrens 	/*
    262    789    ahrens 	 * First, see if we know about this pool.  If not, then add it to the
    263    789    ahrens 	 * list of known pools.
    264    789    ahrens 	 */
    265    789    ahrens 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
    266    789    ahrens 		if (pe->pe_guid == pool_guid)
    267    789    ahrens 			break;
    268    789    ahrens 	}
    269    789    ahrens 
    270    789    ahrens 	if (pe == NULL) {
    271   2082  eschrock 		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
    272   2082  eschrock 			nvlist_free(config);
    273   2082  eschrock 			return (-1);
    274   2082  eschrock 		}
    275    789    ahrens 		pe->pe_guid = pool_guid;
    276    789    ahrens 		pe->pe_next = pl->pools;
    277    789    ahrens 		pl->pools = pe;
    278    789    ahrens 	}
    279    789    ahrens 
    280    789    ahrens 	/*
    281    789    ahrens 	 * Second, see if we know about this toplevel vdev.  Add it if its
    282    789    ahrens 	 * missing.
    283    789    ahrens 	 */
    284    789    ahrens 	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
    285    789    ahrens 		if (ve->ve_guid == top_guid)
    286    789    ahrens 			break;
    287    789    ahrens 	}
    288    789    ahrens 
    289    789    ahrens 	if (ve == NULL) {
    290   2082  eschrock 		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
    291   2082  eschrock 			nvlist_free(config);
    292   2082  eschrock 			return (-1);
    293   2082  eschrock 		}
    294    789    ahrens 		ve->ve_guid = top_guid;
    295    789    ahrens 		ve->ve_next = pe->pe_vdevs;
    296    789    ahrens 		pe->pe_vdevs = ve;
    297    789    ahrens 	}
    298    789    ahrens 
    299    789    ahrens 	/*
    300    789    ahrens 	 * Third, see if we have a config with a matching transaction group.  If
    301    789    ahrens 	 * so, then we do nothing.  Otherwise, add it to the list of known
    302    789    ahrens 	 * configs.
    303    789    ahrens 	 */
    304    789    ahrens 	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
    305    789    ahrens 		if (ce->ce_txg == txg)
    306    789    ahrens 			break;
    307    789    ahrens 	}
    308    789    ahrens 
    309    789    ahrens 	if (ce == NULL) {
    310   2082  eschrock 		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
    311   2082  eschrock 			nvlist_free(config);
    312   2082  eschrock 			return (-1);
    313   2082  eschrock 		}
    314    789    ahrens 		ce->ce_txg = txg;
    315    789    ahrens 		ce->ce_config = config;
    316    789    ahrens 		ce->ce_next = ve->ve_configs;
    317    789    ahrens 		ve->ve_configs = ce;
    318    789    ahrens 	} else {
    319    789    ahrens 		nvlist_free(config);
    320    789    ahrens 	}
    321    789    ahrens 
    322    789    ahrens 	/*
    323    789    ahrens 	 * At this point we've successfully added our config to the list of
    324    789    ahrens 	 * known configs.  The last thing to do is add the vdev guid -> path
    325    789    ahrens 	 * mappings so that we can fix up the configuration as necessary before
    326    789    ahrens 	 * doing the import.
    327    789    ahrens 	 */
    328   2082  eschrock 	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
    329   2082  eschrock 		return (-1);
    330    789    ahrens 
    331   2082  eschrock 	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
    332   2082  eschrock 		free(ne);
    333   2082  eschrock 		return (-1);
    334   2082  eschrock 	}
    335   2082  eschrock 
    336    789    ahrens 	ne->ne_guid = vdev_guid;
    337    789    ahrens 	ne->ne_next = pl->names;
    338    789    ahrens 	pl->names = ne;
    339   2082  eschrock 
    340   2082  eschrock 	return (0);
    341   1760  eschrock }
    342   1760  eschrock 
    343   1760  eschrock /*
    344   1760  eschrock  * Returns true if the named pool matches the given GUID.
    345   1760  eschrock  */
    346   2142  eschrock static int
    347   2142  eschrock pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
    348   2142  eschrock     boolean_t *isactive)
    349   1760  eschrock {
    350   1760  eschrock 	zpool_handle_t *zhp;
    351   1760  eschrock 	uint64_t theguid;
    352   1760  eschrock 
    353   2142  eschrock 	if (zpool_open_silent(hdl, name, &zhp) != 0)
    354   2142  eschrock 		return (-1);
    355   2142  eschrock 
    356   2142  eschrock 	if (zhp == NULL) {
    357   2142  eschrock 		*isactive = B_FALSE;
    358   2142  eschrock 		return (0);
    359   2142  eschrock 	}
    360   1760  eschrock 
    361   1760  eschrock 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
    362   1760  eschrock 	    &theguid) == 0);
    363   1760  eschrock 
    364   1760  eschrock 	zpool_close(zhp);
    365   1760  eschrock 
    366   2142  eschrock 	*isactive = (theguid == guid);
    367   2142  eschrock 	return (0);
    368    789    ahrens }
    369    789    ahrens 
    370   5363  eschrock static nvlist_t *
    371   5363  eschrock refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
    372   5363  eschrock {
    373   5363  eschrock 	nvlist_t *nvl;
    374   5363  eschrock 	zfs_cmd_t zc = { 0 };
    375   5363  eschrock 	int err;
    376   5363  eschrock 
    377   5363  eschrock 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
    378   5363  eschrock 		return (NULL);
    379   5363  eschrock 
    380   5363  eschrock 	if (zcmd_alloc_dst_nvlist(hdl, &zc,
    381   5363  eschrock 	    zc.zc_nvlist_conf_size * 2) != 0) {
    382   5363  eschrock 		zcmd_free_nvlists(&zc);
    383   5363  eschrock 		return (NULL);
    384   5363  eschrock 	}
    385   5363  eschrock 
    386   5363  eschrock 	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
    387   5363  eschrock 	    &zc)) != 0 && errno == ENOMEM) {
    388   5363  eschrock 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
    389   5363  eschrock 			zcmd_free_nvlists(&zc);
    390   5363  eschrock 			return (NULL);
    391   5363  eschrock 		}
    392   5363  eschrock 	}
    393   5363  eschrock 
    394   5363  eschrock 	if (err) {
    395   5363  eschrock 		zcmd_free_nvlists(&zc);
    396   5363  eschrock 		return (NULL);
    397   5363  eschrock 	}
    398   5363  eschrock 
    399   5363  eschrock 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
    400   5363  eschrock 		zcmd_free_nvlists(&zc);
    401   5363  eschrock 		return (NULL);
    402   5363  eschrock 	}
    403   5363  eschrock 
    404   5363  eschrock 	zcmd_free_nvlists(&zc);
    405   5363  eschrock 	return (nvl);
    406  10594    George }
    407  10594    George 
    408  10594    George /*
    409  10594    George  * Determine if the vdev id is a hole in the namespace.
    410  10594    George  */
    411  10594    George boolean_t
    412  10594    George vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
    413  10594    George {
    414  10594    George 	for (int c = 0; c < holes; c++) {
    415  10594    George 
    416  10594    George 		/* Top-level is a hole */
    417  10594    George 		if (hole_array[c] == id)
    418  10594    George 			return (B_TRUE);
    419  10594    George 	}
    420  10594    George 	return (B_FALSE);
    421   5363  eschrock }
    422   5363  eschrock 
    423    789    ahrens /*
    424    789    ahrens  * Convert our list of pools into the definitive set of configurations.  We
    425    789    ahrens  * start by picking the best config for each toplevel vdev.  Once that's done,
    426    789    ahrens  * we assemble the toplevel vdevs into a full config for the pool.  We make a
    427    789    ahrens  * pass to fix up any incorrect paths, and then add it to the main list to
    428    789    ahrens  * return to the user.
    429    789    ahrens  */
    430    789    ahrens static nvlist_t *
    431   5994  ck153898 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
    432    789    ahrens {
    433   2082  eschrock 	pool_entry_t *pe;
    434   2082  eschrock 	vdev_entry_t *ve;
    435   2082  eschrock 	config_entry_t *ce;
    436   2082  eschrock 	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
    437   5450   brendan 	nvlist_t **spares, **l2cache;
    438   5450   brendan 	uint_t i, nspares, nl2cache;
    439   2082  eschrock 	boolean_t config_seen;
    440    789    ahrens 	uint64_t best_txg;
    441   3975  ek110237 	char *name, *hostname;
    442   2082  eschrock 	uint64_t version, guid;
    443   2082  eschrock 	uint_t children = 0;
    444   2082  eschrock 	nvlist_t **child = NULL;
    445  10594    George 	uint_t holes;
    446  10594    George 	uint64_t *hole_array, max_id;
    447   2082  eschrock 	uint_t c;
    448   2142  eschrock 	boolean_t isactive;
    449   3975  ek110237 	uint64_t hostid;
    450   5363  eschrock 	nvlist_t *nvl;
    451   6807  ck153898 	boolean_t found_one = B_FALSE;
    452  10594    George 	boolean_t valid_top_config = B_FALSE;
    453    789    ahrens 
    454   2082  eschrock 	if (nvlist_alloc(&ret, 0, 0) != 0)
    455   2082  eschrock 		goto nomem;
    456    789    ahrens 
    457   2082  eschrock 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
    458  10594    George 		uint64_t id, max_txg = 0;
    459    789    ahrens 
    460   2082  eschrock 		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
    461   2082  eschrock 			goto nomem;
    462   2082  eschrock 		config_seen = B_FALSE;
    463    789    ahrens 
    464    789    ahrens 		/*
    465    789    ahrens 		 * Iterate over all toplevel vdevs.  Grab the pool configuration
    466    789    ahrens 		 * from the first one we find, and then go through the rest and
    467    789    ahrens 		 * add them as necessary to the 'vdevs' member of the config.
    468    789    ahrens 		 */
    469   2082  eschrock 		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
    470    789    ahrens 
    471    789    ahrens 			/*
    472    789    ahrens 			 * Determine the best configuration for this vdev by
    473    789    ahrens 			 * selecting the config with the latest transaction
    474    789    ahrens 			 * group.
    475    789    ahrens 			 */
    476    789    ahrens 			best_txg = 0;
    477    789    ahrens 			for (ce = ve->ve_configs; ce != NULL;
    478    789    ahrens 			    ce = ce->ce_next) {
    479    789    ahrens 
    480   2082  eschrock 				if (ce->ce_txg > best_txg) {
    481    789    ahrens 					tmp = ce->ce_config;
    482   2082  eschrock 					best_txg = ce->ce_txg;
    483  10594    George 				}
    484  10594    George 			}
    485  10594    George 
    486  10594    George 			/*
    487  10594    George 			 * We rely on the fact that the max txg for the
    488  10594    George 			 * pool will contain the most up-to-date information
    489  10594    George 			 * about the valid top-levels in the vdev namespace.
    490  10594    George 			 */
    491  10594    George 			if (best_txg > max_txg) {
    492  10594    George 				(void) nvlist_remove(config,
    493  10594    George 				    ZPOOL_CONFIG_VDEV_CHILDREN,
    494  10594    George 				    DATA_TYPE_UINT64);
    495  10594    George 				(void) nvlist_remove(config,
    496  10594    George 				    ZPOOL_CONFIG_HOLE_ARRAY,
    497  10594    George 				    DATA_TYPE_UINT64_ARRAY);
    498  10594    George 
    499  10594    George 				max_txg = best_txg;
    500  10594    George 				hole_array = NULL;
    501  10594    George 				holes = 0;
    502  10594    George 				max_id = 0;
    503  10594    George 				valid_top_config = B_FALSE;
    504  10594    George 
    505  10594    George 				if (nvlist_lookup_uint64(tmp,
    506  10594    George 				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
    507  10594    George 					verify(nvlist_add_uint64(config,
    508  10594    George 					    ZPOOL_CONFIG_VDEV_CHILDREN,
    509  10594    George 					    max_id) == 0);
    510  10594    George 					valid_top_config = B_TRUE;
    511  10594    George 				}
    512  10594    George 
    513  10594    George 				if (nvlist_lookup_uint64_array(tmp,
    514  10594    George 				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
    515  10594    George 				    &holes) == 0) {
    516  10594    George 					verify(nvlist_add_uint64_array(config,
    517  10594    George 					    ZPOOL_CONFIG_HOLE_ARRAY,
    518  10594    George 					    hole_array, holes) == 0);
    519   2082  eschrock 				}
    520    789    ahrens 			}
    521    789    ahrens 
    522    789    ahrens 			if (!config_seen) {
    523    789    ahrens 				/*
    524    789    ahrens 				 * Copy the relevant pieces of data to the pool
    525    789    ahrens 				 * configuration:
    526    789    ahrens 				 *
    527   2082  eschrock 				 *	version
    528    789    ahrens 				 * 	pool guid
    529    789    ahrens 				 * 	name
    530    789    ahrens 				 * 	pool state
    531   3975  ek110237 				 *	hostid (if available)
    532   3975  ek110237 				 *	hostname (if available)
    533    789    ahrens 				 */
    534    789    ahrens 				uint64_t state;
    535    789    ahrens 
    536    789    ahrens 				verify(nvlist_lookup_uint64(tmp,
    537   2082  eschrock 				    ZPOOL_CONFIG_VERSION, &version) == 0);
    538   2082  eschrock 				if (nvlist_add_uint64(config,
    539   2082  eschrock 				    ZPOOL_CONFIG_VERSION, version) != 0)
    540   2082  eschrock 					goto nomem;
    541   2082  eschrock 				verify(nvlist_lookup_uint64(tmp,
    542    789    ahrens 				    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
    543   2082  eschrock 				if (nvlist_add_uint64(config,
    544   2082  eschrock 				    ZPOOL_CONFIG_POOL_GUID, guid) != 0)
    545   2082  eschrock 					goto nomem;
    546    789    ahrens 				verify(nvlist_lookup_string(tmp,
    547    789    ahrens 				    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
    548   2082  eschrock 				if (nvlist_add_string(config,
    549   2082  eschrock 				    ZPOOL_CONFIG_POOL_NAME, name) != 0)
    550   2082  eschrock 					goto nomem;
    551    789    ahrens 				verify(nvlist_lookup_uint64(tmp,
    552    789    ahrens 				    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
    553   2082  eschrock 				if (nvlist_add_uint64(config,
    554   2082  eschrock 				    ZPOOL_CONFIG_POOL_STATE, state) != 0)
    555   2082  eschrock 					goto nomem;
    556   3975  ek110237 				hostid = 0;
    557   3975  ek110237 				if (nvlist_lookup_uint64(tmp,
    558   3975  ek110237 				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
    559   3975  ek110237 					if (nvlist_add_uint64(config,
    560   3975  ek110237 					    ZPOOL_CONFIG_HOSTID, hostid) != 0)
    561   3975  ek110237 						goto nomem;
    562   3975  ek110237 					verify(nvlist_lookup_string(tmp,
    563   3975  ek110237 					    ZPOOL_CONFIG_HOSTNAME,
    564   3975  ek110237 					    &hostname) == 0);
    565   3975  ek110237 					if (nvlist_add_string(config,
    566   3975  ek110237 					    ZPOOL_CONFIG_HOSTNAME,
    567   3975  ek110237 					    hostname) != 0)
    568   3975  ek110237 						goto nomem;
    569   3975  ek110237 				}
    570    789    ahrens 
    571   2082  eschrock 				config_seen = B_TRUE;
    572    789    ahrens 			}
    573    789    ahrens 
    574    789    ahrens 			/*
    575    789    ahrens 			 * Add this top-level vdev to the child array.
    576    789    ahrens 			 */
    577    789    ahrens 			verify(nvlist_lookup_nvlist(tmp,
    578    789    ahrens 			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
    579    789    ahrens 			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
    580    789    ahrens 			    &id) == 0);
    581  10594    George 
    582    789    ahrens 			if (id >= children) {
    583    789    ahrens 				nvlist_t **newchild;
    584    789    ahrens 
    585   2082  eschrock 				newchild = zfs_alloc(hdl, (id + 1) *
    586    789    ahrens 				    sizeof (nvlist_t *));
    587   2082  eschrock 				if (newchild == NULL)
    588   2082  eschrock 					goto nomem;
    589    789    ahrens 
    590    789    ahrens 				for (c = 0; c < children; c++)
    591    789    ahrens 					newchild[c] = child[c];
    592    789    ahrens 
    593    789    ahrens 				free(child);
    594    789    ahrens 				child = newchild;
    595    789    ahrens 				children = id + 1;
    596    789    ahrens 			}
    597   2082  eschrock 			if (nvlist_dup(nvtop, &child[id], 0) != 0)
    598   2082  eschrock 				goto nomem;
    599    789    ahrens 
    600    789    ahrens 		}
    601    789    ahrens 
    602  10594    George 		/*
    603  10594    George 		 * If we have information about all the top-levels then
    604  10594    George 		 * clean up the nvlist which we've constructed. This
    605  10594    George 		 * means removing any extraneous devices that are
    606  10594    George 		 * beyond the valid range or adding devices to the end
    607  10594    George 		 * of our array which appear to be missing.
    608  10594    George 		 */
    609  10594    George 		if (valid_top_config) {
    610  10594    George 			if (max_id < children) {
    611  10594    George 				for (c = max_id; c < children; c++)
    612  10594    George 					nvlist_free(child[c]);
    613  10594    George 				children = max_id;
    614  10594    George 			} else if (max_id > children) {
    615  10594    George 				nvlist_t **newchild;
    616  10594    George 
    617  10594    George 				newchild = zfs_alloc(hdl, (max_id) *
    618  10594    George 				    sizeof (nvlist_t *));
    619  10594    George 				if (newchild == NULL)
    620  10594    George 					goto nomem;
    621  10594    George 
    622  10594    George 				for (c = 0; c < children; c++)
    623  10594    George 					newchild[c] = child[c];
    624  10594    George 
    625  10594    George 				free(child);
    626  10594    George 				child = newchild;
    627  10594    George 				children = max_id;
    628  10594    George 			}
    629  10594    George 		}
    630  10594    George 
    631    789    ahrens 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
    632    789    ahrens 		    &guid) == 0);
    633  10594    George 
    634  10594    George 		/*
    635  10594    George 		 * The vdev namespace may contain holes as a result of
    636  10594    George 		 * device removal. We must add them back into the vdev
    637  10594    George 		 * tree before we process any missing devices.
    638  10594    George 		 */
    639  10594    George 		if (holes > 0) {
    640  10594    George 			ASSERT(valid_top_config);
    641  10594    George 
    642  10594    George 			for (c = 0; c < children; c++) {
    643  10594    George 				nvlist_t *holey;
    644  10594    George 
    645  10594    George 				if (child[c] != NULL ||
    646  10594    George 				    !vdev_is_hole(hole_array, holes, c))
    647  10594    George 					continue;
    648  10594    George 
    649  10594    George 				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
    650  10594    George 				    0) != 0)
    651  10594    George 					goto nomem;
    652  10594    George 
    653  10594    George 				/*
    654  10594    George 				 * Holes in the namespace are treated as
    655  10594    George 				 * "hole" top-level vdevs and have a
    656  10594    George 				 * special flag set on them.
    657  10594    George 				 */
    658  10594    George 				if (nvlist_add_string(holey,
    659  10594    George 				    ZPOOL_CONFIG_TYPE,
    660  10594    George 				    VDEV_TYPE_HOLE) != 0 ||
    661  10594    George 				    nvlist_add_uint64(holey,
    662  10594    George 				    ZPOOL_CONFIG_ID, c) != 0 ||
    663  10594    George 				    nvlist_add_uint64(holey,
    664  10594    George 				    ZPOOL_CONFIG_GUID, 0ULL) != 0)
    665  10594    George 					goto nomem;
    666  10594    George 				child[c] = holey;
    667  10594    George 			}
    668  10594    George 		}
    669    789    ahrens 
    670    789    ahrens 		/*
    671    789    ahrens 		 * Look for any missing top-level vdevs.  If this is the case,
    672    789    ahrens 		 * create a faked up 'missing' vdev as a placeholder.  We cannot
    673    789    ahrens 		 * simply compress the child array, because the kernel performs
    674    789    ahrens 		 * certain checks to make sure the vdev IDs match their location
    675    789    ahrens 		 * in the configuration.
    676    789    ahrens 		 */
    677  10594    George 		for (c = 0; c < children; c++) {
    678    789    ahrens 			if (child[c] == NULL) {
    679    789    ahrens 				nvlist_t *missing;
    680   2082  eschrock 				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
    681   2082  eschrock 				    0) != 0)
    682   2082  eschrock 					goto nomem;
    683   2082  eschrock 				if (nvlist_add_string(missing,
    684   2082  eschrock 				    ZPOOL_CONFIG_TYPE,
    685   2082  eschrock 				    VDEV_TYPE_MISSING) != 0 ||
    686   2082  eschrock 				    nvlist_add_uint64(missing,
    687   2082  eschrock 				    ZPOOL_CONFIG_ID, c) != 0 ||
    688   2082  eschrock 				    nvlist_add_uint64(missing,
    689   2082  eschrock 				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
    690   2082  eschrock 					nvlist_free(missing);
    691   2082  eschrock 					goto nomem;
    692   2082  eschrock 				}
    693    789    ahrens 				child[c] = missing;
    694    789    ahrens 			}
    695  10594    George 		}
    696    789    ahrens 
    697    789    ahrens 		/*
    698    789    ahrens 		 * Put all of this pool's top-level vdevs into a root vdev.
    699    789    ahrens 		 */
    700   2082  eschrock 		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
    701   2082  eschrock 			goto nomem;
    702   2082  eschrock 		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
    703   2082  eschrock 		    VDEV_TYPE_ROOT) != 0 ||
    704   2082  eschrock 		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
    705   2082  eschrock 		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
    706   2082  eschrock 		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
    707   2082  eschrock 		    child, children) != 0) {
    708   2082  eschrock 			nvlist_free(nvroot);
    709   2082  eschrock 			goto nomem;
    710   2082  eschrock 		}
    711    789    ahrens 
    712    789    ahrens 		for (c = 0; c < children; c++)
    713    789    ahrens 			nvlist_free(child[c]);
    714    789    ahrens 		free(child);
    715   2082  eschrock 		children = 0;
    716   2082  eschrock 		child = NULL;
    717    789    ahrens 
    718    789    ahrens 		/*
    719    789    ahrens 		 * Go through and fix up any paths and/or devids based on our
    720    789    ahrens 		 * known list of vdev GUID -> path mappings.
    721    789    ahrens 		 */
    722   2082  eschrock 		if (fix_paths(nvroot, pl->names) != 0) {
    723   2082  eschrock 			nvlist_free(nvroot);
    724   2082  eschrock 			goto nomem;
    725   2082  eschrock 		}
    726    789    ahrens 
    727    789    ahrens 		/*
    728    789    ahrens 		 * Add the root vdev to this pool's configuration.
    729    789    ahrens 		 */
    730   2082  eschrock 		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
    731   2082  eschrock 		    nvroot) != 0) {
    732   2082  eschrock 			nvlist_free(nvroot);
    733   2082  eschrock 			goto nomem;
    734   2082  eschrock 		}
    735    789    ahrens 		nvlist_free(nvroot);
    736    789    ahrens 
    737    789    ahrens 		/*
    738   5994  ck153898 		 * zdb uses this path to report on active pools that were
    739   5994  ck153898 		 * imported or created using -R.
    740   5994  ck153898 		 */
    741   5994  ck153898 		if (active_ok)
    742   5994  ck153898 			goto add_pool;
    743   5994  ck153898 
    744   5994  ck153898 		/*
    745    789    ahrens 		 * Determine if this pool is currently active, in which case we
    746    789    ahrens 		 * can't actually import it.
    747    789    ahrens 		 */
    748    789    ahrens 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
    749    789    ahrens 		    &name) == 0);
    750    789    ahrens 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
    751    789    ahrens 		    &guid) == 0);
    752    789    ahrens 
    753   2142  eschrock 		if (pool_active(hdl, name, guid, &isactive) != 0)
    754   2142  eschrock 			goto error;
    755   2142  eschrock 
    756   2144  eschrock 		if (isactive) {
    757    789    ahrens 			nvlist_free(config);
    758   2082  eschrock 			config = NULL;
    759    789    ahrens 			continue;
    760    789    ahrens 		}
    761    789    ahrens 
    762  10594    George 		if ((nvl = refresh_config(hdl, config)) == NULL) {
    763  10594    George 			nvlist_free(config);
    764  10594    George 			config = NULL;
    765  10594    George 			continue;
    766  10594    George 		}
    767    789    ahrens 
    768    789    ahrens 		nvlist_free(config);
    769   5363  eschrock 		config = nvl;
    770    789    ahrens 
    771   2082  eschrock 		/*
    772   2082  eschrock 		 * Go through and update the paths for spares, now that we have
    773   2082  eschrock 		 * them.
    774   2082  eschrock 		 */
    775   2082  eschrock 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
    776   2082  eschrock 		    &nvroot) == 0);
    777   2082  eschrock 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
    778   2082  eschrock 		    &spares, &nspares) == 0) {
    779   2082  eschrock 			for (i = 0; i < nspares; i++) {
    780   2082  eschrock 				if (fix_paths(spares[i], pl->names) != 0)
    781   2082  eschrock 					goto nomem;
    782   2082  eschrock 			}
    783   3975  ek110237 		}
    784   3975  ek110237 
    785   3975  ek110237 		/*
    786   5450   brendan 		 * Update the paths for l2cache devices.
    787   5450   brendan 		 */
    788   5450   brendan 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
    789   5450   brendan 		    &l2cache, &nl2cache) == 0) {
    790   5450   brendan 			for (i = 0; i < nl2cache; i++) {
    791   5450   brendan 				if (fix_paths(l2cache[i], pl->names) != 0)
    792   5450   brendan 					goto nomem;
    793   5450   brendan 			}
    794   5450   brendan 		}
    795   5450   brendan 
    796   5450   brendan 		/*
    797   3975  ek110237 		 * Restore the original information read from the actual label.
    798   3975  ek110237 		 */
    799   3975  ek110237 		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
    800   3975  ek110237 		    DATA_TYPE_UINT64);
    801   3975  ek110237 		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
    802   3975  ek110237 		    DATA_TYPE_STRING);
    803   3975  ek110237 		if (hostid != 0) {
    804   3975  ek110237 			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
    805   3975  ek110237 			    hostid) == 0);
    806   3975  ek110237 			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
    807   3975  ek110237 			    hostname) == 0);
    808   2082  eschrock 		}
    809   2082  eschrock 
    810   5994  ck153898 add_pool:
    811    789    ahrens 		/*
    812    789    ahrens 		 * Add this pool to the list of configs.
    813    789    ahrens 		 */
    814   2676  eschrock 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
    815   2676  eschrock 		    &name) == 0);
    816   2082  eschrock 		if (nvlist_add_nvlist(ret, name, config) != 0)
    817   2082  eschrock 			goto nomem;
    818    789    ahrens 
    819   6807  ck153898 		found_one = B_TRUE;
    820    789    ahrens 		nvlist_free(config);
    821   2082  eschrock 		config = NULL;
    822   6807  ck153898 	}
    823   6807  ck153898 
    824   6807  ck153898 	if (!found_one) {
    825   6807  ck153898 		nvlist_free(ret);
    826   6807  ck153898 		ret = NULL;
    827    789    ahrens 	}
    828    789    ahrens 
    829    789    ahrens 	return (ret);
    830   2082  eschrock 
    831   2082  eschrock nomem:
    832   2082  eschrock 	(void) no_memory(hdl);
    833   2082  eschrock error:
    834   2142  eschrock 	nvlist_free(config);
    835   2142  eschrock 	nvlist_free(ret);
    836   2082  eschrock 	for (c = 0; c < children; c++)
    837   2082  eschrock 		nvlist_free(child[c]);
    838   2142  eschrock 	free(child);
    839   2082  eschrock 
    840   2082  eschrock 	return (NULL);
    841    789    ahrens }
    842    789    ahrens 
    843    789    ahrens /*
    844    789    ahrens  * Return the offset of the given label.
    845    789    ahrens  */
    846    789    ahrens static uint64_t
    847   4577    ahrens label_offset(uint64_t size, int l)
    848    789    ahrens {
    849   4577    ahrens 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
    850    789    ahrens 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
    851    789    ahrens 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
    852    789    ahrens }
    853    789    ahrens 
    854    789    ahrens /*
    855    789    ahrens  * Given a file descriptor, read the label information and return an nvlist
    856    789    ahrens  * describing the configuration, if there is one.
    857    789    ahrens  */
    858   2082  eschrock int
    859   2082  eschrock zpool_read_label(int fd, nvlist_t **config)
    860    789    ahrens {
    861    789    ahrens 	struct stat64 statbuf;
    862    789    ahrens 	int l;
    863    789    ahrens 	vdev_label_t *label;
    864   4577    ahrens 	uint64_t state, txg, size;
    865    789    ahrens 
    866   2082  eschrock 	*config = NULL;
    867   2082  eschrock 
    868    789    ahrens 	if (fstat64(fd, &statbuf) == -1)
    869   2082  eschrock 		return (0);
    870   4577    ahrens 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
    871    789    ahrens 
    872   2082  eschrock 	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
    873   2082  eschrock 		return (-1);
    874    789    ahrens 
    875    789    ahrens 	for (l = 0; l < VDEV_LABELS; l++) {
    876   6643  eschrock 		if (pread64(fd, label, sizeof (vdev_label_t),
    877   4577    ahrens 		    label_offset(size, l)) != sizeof (vdev_label_t))
    878    789    ahrens 			continue;
    879    789    ahrens 
    880    789    ahrens 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
    881   2082  eschrock 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
    882    789    ahrens 			continue;
    883    789    ahrens 
    884   2082  eschrock 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
    885   5450   brendan 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
    886   2082  eschrock 			nvlist_free(*config);
    887    789    ahrens 			continue;
    888    789    ahrens 		}
    889    789    ahrens 
    890   5450   brendan 		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
    891   2082  eschrock 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
    892   2082  eschrock 		    &txg) != 0 || txg == 0)) {
    893   2082  eschrock 			nvlist_free(*config);
    894    789    ahrens 			continue;
    895    789    ahrens 		}
    896    789    ahrens 
    897    789    ahrens 		free(label);
    898   2082  eschrock 		return (0);
    899    789    ahrens 	}
    900    789    ahrens 
    901    789    ahrens 	free(label);
    902   2082  eschrock 	*config = NULL;
    903   2082  eschrock 	return (0);
    904    789    ahrens }
    905    789    ahrens 
    906  10980      Eric typedef struct rdsk_node {
    907  10980      Eric 	char *rn_name;
    908  10980      Eric 	int rn_dfd;
    909  10980      Eric 	libzfs_handle_t *rn_hdl;
    910  10980      Eric 	nvlist_t *rn_config;
    911  10980      Eric 	avl_tree_t *rn_avl;
    912  10980      Eric 	avl_node_t rn_node;
    913  10980      Eric 	boolean_t rn_nozpool;
    914  10980      Eric } rdsk_node_t;
    915  10980      Eric 
    916  10980      Eric static int
    917  10980      Eric slice_cache_compare(const void *arg1, const void *arg2)
    918  10980      Eric {
    919  10980      Eric 	const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
    920  10980      Eric 	const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
    921  10980      Eric 	char *nm1slice, *nm2slice;
    922  10980      Eric 	int rv;
    923  10980      Eric 
    924  10980      Eric 	/*
    925  10980      Eric 	 * slices zero and two are the most likely to provide results,
    926  10980      Eric 	 * so put those first
    927  10980      Eric 	 */
    928  10980      Eric 	nm1slice = strstr(nm1, "s0");
    929  10980      Eric 	nm2slice = strstr(nm2, "s0");
    930  10980      Eric 	if (nm1slice && !nm2slice) {
    931  10980      Eric 		return (-1);
    932  10980      Eric 	}
    933  10980      Eric 	if (!nm1slice && nm2slice) {
    934  10980      Eric 		return (1);
    935  10980      Eric 	}
    936  10980      Eric 	nm1slice = strstr(nm1, "s2");
    937  10980      Eric 	nm2slice = strstr(nm2, "s2");
    938  10980      Eric 	if (nm1slice && !nm2slice) {
    939  10980      Eric 		return (-1);
    940  10980      Eric 	}
    941  10980      Eric 	if (!nm1slice && nm2slice) {
    942  10980      Eric 		return (1);
    943  10980      Eric 	}
    944  10980      Eric 
    945  10980      Eric 	rv = strcmp(nm1, nm2);
    946  10980      Eric 	if (rv == 0)
    947  10980      Eric 		return (0);
    948  10980      Eric 	return (rv > 0 ? 1 : -1);
    949  10980      Eric }
    950  10980      Eric 
    951  10980      Eric static void
    952  10980      Eric check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
    953  10980      Eric     diskaddr_t size, uint_t blksz)
    954  10980      Eric {
    955  10980      Eric 	rdsk_node_t tmpnode;
    956  10980      Eric 	rdsk_node_t *node;
    957  10980      Eric 	char sname[MAXNAMELEN];
    958  10980      Eric 
    959  10980      Eric 	tmpnode.rn_name = &sname[0];
    960  10980      Eric 	(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
    961  10980      Eric 	    diskname, partno);
    962  11123      Eric 	/*
    963  11123      Eric 	 * protect against division by zero for disk labels that
    964  11123      Eric 	 * contain a bogus sector size
    965  11123      Eric 	 */
    966  11123      Eric 	if (blksz == 0)
    967  11123      Eric 		blksz = DEV_BSIZE;
    968  10980      Eric 	/* too small to contain a zpool? */
    969  10980      Eric 	if ((size < (SPA_MINDEVSIZE / blksz)) &&
    970  10980      Eric 	    (node = avl_find(r, &tmpnode, NULL)))
    971  10980      Eric 		node->rn_nozpool = B_TRUE;
    972  10980      Eric }
    973  10980      Eric 
    974  10980      Eric static void
    975  10980      Eric nozpool_all_slices(avl_tree_t *r, const char *sname)
    976  10980      Eric {
    977  10980      Eric 	char diskname[MAXNAMELEN];
    978  10980      Eric 	char *ptr;
    979  10980      Eric 	int i;
    980  10980      Eric 
    981  10980      Eric 	(void) strncpy(diskname, sname, MAXNAMELEN);
    982  10980      Eric 	if (((ptr = strrchr(diskname, 's')) == NULL) &&
    983  10980      Eric 	    ((ptr = strrchr(diskname, 'p')) == NULL))
    984  10980      Eric 		return;
    985  10980      Eric 	ptr[0] = 's';
    986  10980      Eric 	ptr[1] = '\0';
    987  10980      Eric 	for (i = 0; i < NDKMAP; i++)
    988  10980      Eric 		check_one_slice(r, diskname, i, 0, 1);
    989  10980      Eric 	ptr[0] = 'p';
    990  10980      Eric 	for (i = 0; i <= FD_NUMPART; i++)
    991  10980      Eric 		check_one_slice(r, diskname, i, 0, 1);
    992  10980      Eric }
    993  10980      Eric 
    994  10980      Eric static void
    995  10980      Eric check_slices(avl_tree_t *r, int fd, const char *sname)
    996  10980      Eric {
    997  10980      Eric 	struct extvtoc vtoc;
    998  10980      Eric 	struct dk_gpt *gpt;
    999  10980      Eric 	char diskname[MAXNAMELEN];
   1000  10980      Eric 	char *ptr;
   1001  10980      Eric 	int i;
   1002  10980      Eric 
   1003  10980      Eric 	(void) strncpy(diskname, sname, MAXNAMELEN);
   1004  10980      Eric 	if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
   1005  10980      Eric 		return;
   1006  10980      Eric 	ptr[1] = '\0';
   1007  10980      Eric 
   1008  10980      Eric 	if (read_extvtoc(fd, &vtoc) >= 0) {
   1009  10980      Eric 		for (i = 0; i < NDKMAP; i++)
   1010  10980      Eric 			check_one_slice(r, diskname, i,
   1011  10980      Eric 			    vtoc.v_part[i].p_size, vtoc.v_sectorsz);
   1012  10980      Eric 	} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
   1013  10980      Eric 		/*
   1014  10980      Eric 		 * on x86 we'll still have leftover links that point
   1015  10980      Eric 		 * to slices s[9-15], so use NDKMAP instead
   1016  10980      Eric 		 */
   1017  10980      Eric 		for (i = 0; i < NDKMAP; i++)
   1018  10980      Eric 			check_one_slice(r, diskname, i,
   1019  10980      Eric 			    gpt->efi_parts[i].p_size, gpt->efi_lbasize);
   1020  10980      Eric 		/* nodes p[1-4] are never used with EFI labels */
   1021  10980      Eric 		ptr[0] = 'p';
   1022  10980      Eric 		for (i = 1; i <= FD_NUMPART; i++)
   1023  10980      Eric 			check_one_slice(r, diskname, i, 0, 1);
   1024  10980      Eric 		efi_free(gpt);
   1025  10980      Eric 	}
   1026  10980      Eric }
   1027  10980      Eric 
   1028  10980      Eric static void
   1029  10980      Eric zpool_open_func(void *arg)
   1030  10980      Eric {
   1031  10980      Eric 	rdsk_node_t *rn = arg;
   1032  10980      Eric 	struct stat64 statbuf;
   1033  10980      Eric 	nvlist_t *config;
   1034  10980      Eric 	int fd;
   1035  10980      Eric 
   1036  10980      Eric 	if (rn->rn_nozpool)
   1037  10980      Eric 		return;
   1038  10980      Eric 	if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
   1039  10980      Eric 		/* symlink to a device that's no longer there */
   1040  10980      Eric 		if (errno == ENOENT)
   1041  10980      Eric 			nozpool_all_slices(rn->rn_avl, rn->rn_name);
   1042  10980      Eric 		return;
   1043  10980      Eric 	}
   1044  10980      Eric 	/*
   1045  10980      Eric 	 * Ignore failed stats.  We only want regular
   1046  10980      Eric 	 * files, character devs and block devs.
   1047  10980      Eric 	 */
   1048  10980      Eric 	if (fstat64(fd, &statbuf) != 0 ||
   1049  10980      Eric 	    (!S_ISREG(statbuf.st_mode) &&
   1050  10980      Eric 	    !S_ISCHR(statbuf.st_mode) &&
   1051  10980      Eric 	    !S_ISBLK(statbuf.st_mode))) {
   1052  10980      Eric 		(void) close(fd);
   1053  10980      Eric 		return;
   1054  10980      Eric 	}
   1055  10980      Eric 	/* this file is too small to hold a zpool */
   1056  10980      Eric 	if (S_ISREG(statbuf.st_mode) &&
   1057  10980      Eric 	    statbuf.st_size < SPA_MINDEVSIZE) {
   1058  10980      Eric 		(void) close(fd);
   1059  10980      Eric 		return;
   1060  10980      Eric 	} else if (!S_ISREG(statbuf.st_mode)) {
   1061  10980      Eric 		/*
   1062  10980      Eric 		 * Try to read the disk label first so we don't have to
   1063  10980      Eric 		 * open a bunch of minor nodes that can't have a zpool.
   1064  10980      Eric 		 */
   1065  10980      Eric 		check_slices(rn->rn_avl, fd, rn->rn_name);
   1066  10980      Eric 	}
   1067  10980      Eric 
   1068  10980      Eric 	if ((zpool_read_label(fd, &config)) != 0) {
   1069  10980      Eric 		(void) close(fd);
   1070  10980      Eric 		(void) no_memory(rn->rn_hdl);
   1071  10980      Eric 		return;
   1072  10980      Eric 	}
   1073  10980      Eric 	(void) close(fd);
   1074  10980      Eric 
   1075  10980      Eric 
   1076  10980      Eric 	rn->rn_config = config;
   1077  10980      Eric 	if (config != NULL) {
   1078  10980      Eric 		assert(rn->rn_nozpool == B_FALSE);
   1079  10980      Eric 	}
   1080  10980      Eric }
   1081  10980      Eric 
   1082    789    ahrens /*
   1083  10830      Eric  * Given a file descriptor, clear (zero) the label information.  This function
   1084  10830      Eric  * is currently only used in the appliance stack as part of the ZFS sysevent
   1085  10830      Eric  * module.
   1086  10830      Eric  */
   1087  10830      Eric int
   1088  10830      Eric zpool_clear_label(int fd)
   1089  10830      Eric {
   1090  10830      Eric 	struct stat64 statbuf;
   1091  10830      Eric 	int l;
   1092  10830      Eric 	vdev_label_t *label;
   1093  10830      Eric 	uint64_t size;
   1094  10830      Eric 
   1095  10830      Eric 	if (fstat64(fd, &statbuf) == -1)
   1096  10830      Eric 		return (0);
   1097  10830      Eric 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
   1098  10830      Eric 
   1099  10830      Eric 	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
   1100  10830      Eric 		return (-1);
   1101  10830      Eric 
   1102  10830      Eric 	for (l = 0; l < VDEV_LABELS; l++) {
   1103  10830      Eric 		if (pwrite64(fd, label, sizeof (vdev_label_t),
   1104  10830      Eric 		    label_offset(size, l)) != sizeof (vdev_label_t))
   1105  10830      Eric 			return (-1);
   1106  10830      Eric 	}
   1107  10830      Eric 
   1108  10830      Eric 	free(label);
   1109  10830      Eric 	return (0);
   1110  10830      Eric }
   1111  10830      Eric 
   1112  10830      Eric /*
   1113    789    ahrens  * Given a list of directories to search, find all pools stored on disk.  This
   1114    789    ahrens  * includes partial pools which are not available to import.  If no args are
   1115    789    ahrens  * given (argc is 0), then the default directory (/dev/dsk) is searched.
   1116   6807  ck153898  * poolname or guid (but not both) are provided by the caller when trying
   1117   6807  ck153898  * to import a specific pool.
   1118    789    ahrens  */
   1119   6807  ck153898 static nvlist_t *
   1120   6807  ck153898 zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
   1121   6807  ck153898     boolean_t active_ok, char *poolname, uint64_t guid)
   1122    789    ahrens {
   1123    789    ahrens 	int i;
   1124   4055  eschrock 	DIR *dirp = NULL;
   1125    789    ahrens 	struct dirent64 *dp;
   1126    789    ahrens 	char path[MAXPATHLEN];
   1127   6376   jwadams 	char *end;
   1128   6376   jwadams 	size_t pathleft;
   1129  10980      Eric 	nvlist_t *ret = NULL;
   1130    789    ahrens 	static char *default_dir = "/dev/dsk";
   1131    789    ahrens 	pool_list_t pools = { 0 };
   1132   2082  eschrock 	pool_entry_t *pe, *penext;
   1133   2082  eschrock 	vdev_entry_t *ve, *venext;
   1134   2082  eschrock 	config_entry_t *ce, *cenext;
   1135   2082  eschrock 	name_entry_t *ne, *nenext;
   1136  10980      Eric 	avl_tree_t slice_cache;
   1137  10980      Eric 	rdsk_node_t *slice;
   1138  10980      Eric 	void *cookie;
   1139   6807  ck153898 
   1140   6807  ck153898 	verify(poolname == NULL || guid == 0);
   1141    789    ahrens 
   1142    789    ahrens 	if (argc == 0) {
   1143    789    ahrens 		argc = 1;
   1144    789    ahrens 		argv = &default_dir;
   1145    789    ahrens 	}
   1146    789    ahrens 
   1147    789    ahrens 	/*
   1148    789    ahrens 	 * Go through and read the label configuration information from every
   1149    789    ahrens 	 * possible device, organizing the information according to pool GUID
   1150    789    ahrens 	 * and toplevel GUID.
   1151    789    ahrens 	 */
   1152    789    ahrens 	for (i = 0; i < argc; i++) {
   1153  10980      Eric 		tpool_t *t;
   1154   6376   jwadams 		char *rdsk;
   1155   6376   jwadams 		int dfd;
   1156   6376   jwadams 
   1157   6376   jwadams 		/* use realpath to normalize the path */
   1158   6376   jwadams 		if (realpath(argv[i], path) == 0) {
   1159   3237     lling 			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
   1160   2082  eschrock 			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
   1161    789    ahrens 			    argv[i]);
   1162   2082  eschrock 			goto error;
   1163    789    ahrens 		}
   1164   6376   jwadams 		end = &path[strlen(path)];
   1165   6376   jwadams 		*end++ = '/';
   1166   6376   jwadams 		*end = 0;
   1167   6376   jwadams 		pathleft = &path[sizeof (path)] - end;
   1168    789    ahrens 
   1169   6376   jwadams 		/*
   1170   6376   jwadams 		 * Using raw devices instead of block devices when we're
   1171   6376   jwadams 		 * reading the labels skips a bunch of slow operations during
   1172   6376   jwadams 		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
   1173   6376   jwadams 		 */
   1174   6376   jwadams 		if (strcmp(path, "/dev/dsk/") == 0)
   1175   6376   jwadams 			rdsk = "/dev/rdsk/";
   1176   6376   jwadams 		else
   1177   6376   jwadams 			rdsk = path;
   1178   6376   jwadams 
   1179   6376   jwadams 		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
   1180   6376   jwadams 		    (dirp = fdopendir(dfd)) == NULL) {
   1181   2082  eschrock 			zfs_error_aux(hdl, strerror(errno));
   1182   3237     lling 			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
   1183   2082  eschrock 			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
   1184   6376   jwadams 			    rdsk);
   1185   2082  eschrock 			goto error;
   1186    789    ahrens 		}
   1187    789    ahrens 
   1188  10980      Eric 		avl_create(&slice_cache, slice_cache_compare,
   1189  10980      Eric 		    sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
   1190    789    ahrens 		/*
   1191    789    ahrens 		 * This is not MT-safe, but we have no MT consumers of libzfs
   1192    789    ahrens 		 */
   1193    789    ahrens 		while ((dp = readdir64(dirp)) != NULL) {
   1194   6376   jwadams 			const char *name = dp->d_name;
   1195   6376   jwadams 			if (name[0] == '.' &&
   1196   6376   jwadams 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
   1197   6376   jwadams 				continue;
   1198    789    ahrens 
   1199  10980      Eric 			slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
   1200  10980      Eric 			slice->rn_name = zfs_strdup(hdl, name);
   1201  10980      Eric 			slice->rn_avl = &slice_cache;
   1202  10980      Eric 			slice->rn_dfd = dfd;
   1203  10980      Eric 			slice->rn_hdl = hdl;
   1204  10980      Eric 			slice->rn_nozpool = B_FALSE;
   1205  10980      Eric 			avl_add(&slice_cache, slice);
   1206  10980      Eric 		}
   1207  10980      Eric 		/*
   1208  10980      Eric 		 * create a thread pool to do all of this in parallel;
   1209  10980      Eric 		 * rn_nozpool is not protected, so this is racy in that
   1210  10980      Eric 		 * multiple tasks could decide that the same slice can
   1211  10980      Eric 		 * not hold a zpool, which is benign.  Also choose
   1212  10980      Eric 		 * double the number of processors; we hold a lot of
   1213  10980      Eric 		 * locks in the kernel, so going beyond this doesn't
   1214  10980      Eric 		 * buy us much.
   1215  10980      Eric 		 */
   1216  10980      Eric 		t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
   1217  10980      Eric 		    0, NULL);
   1218  10980      Eric 		for (slice = avl_first(&slice_cache); slice;
   1219  10980      Eric 		    (slice = avl_walk(&slice_cache, slice,
   1220  10980      Eric 		    AVL_AFTER)))
   1221  10980      Eric 			(void) tpool_dispatch(t, zpool_open_func, slice);
   1222  10980      Eric 		tpool_wait(t);
   1223  10980      Eric 		tpool_destroy(t);
   1224    789    ahrens 
   1225  10980      Eric 		cookie = NULL;
   1226  10980      Eric 		while ((slice = avl_destroy_nodes(&slice_cache,
   1227  10980      Eric 		    &cookie)) != NULL) {
   1228  10980      Eric 			if (slice->rn_config != NULL) {
   1229  10980      Eric 				nvlist_t *config = slice->rn_config;
   1230   6807  ck153898 				boolean_t matched = B_TRUE;
   1231   6807  ck153898 
   1232   6807  ck153898 				if (poolname != NULL) {
   1233   6807  ck153898 					char *pname;
   1234   6895  ck153898 
   1235   6895  ck153898 					matched = nvlist_lookup_string(config,
   1236   6807  ck153898 					    ZPOOL_CONFIG_POOL_NAME,
   1237   6895  ck153898 					    &pname) == 0 &&
   1238   6895  ck153898 					    strcmp(poolname, pname) == 0;
   1239   6807  ck153898 				} else if (guid != 0) {
   1240   6807  ck153898 					uint64_t this_guid;
   1241   7043  ck153898 
   1242   7043  ck153898 					matched = nvlist_lookup_uint64(config,
   1243   6807  ck153898 					    ZPOOL_CONFIG_POOL_GUID,
   1244   7043  ck153898 					    &this_guid) == 0 &&
   1245   7043  ck153898 					    guid == this_guid;
   1246   6807  ck153898 				}
   1247   6807  ck153898 				if (!matched) {
   1248   6807  ck153898 					nvlist_free(config);
   1249   6807  ck153898 					config = NULL;
   1250   6807  ck153898 					continue;
   1251   6807  ck153898 				}
   1252   6376   jwadams 				/* use the non-raw path for the config */
   1253  10980      Eric 				(void) strlcpy(end, slice->rn_name, pathleft);
   1254   2082  eschrock 				if (add_config(hdl, &pools, path, config) != 0)
   1255   2082  eschrock 					goto error;
   1256   6376   jwadams 			}
   1257  10980      Eric 			free(slice->rn_name);
   1258  10980      Eric 			free(slice);
   1259    789    ahrens 		}
   1260  10980      Eric 		avl_destroy(&slice_cache);
   1261   4055  eschrock 
   1262   4055  eschrock 		(void) closedir(dirp);
   1263   4055  eschrock 		dirp = NULL;
   1264    789    ahrens 	}
   1265    789    ahrens 
   1266   5994  ck153898 	ret = get_configs(hdl, &pools, active_ok);
   1267   2082  eschrock 
   1268   2082  eschrock error:
   1269   2082  eschrock 	for (pe = pools.pools; pe != NULL; pe = penext) {
   1270   2082  eschrock 		penext = pe->pe_next;
   1271   2082  eschrock 		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
   1272   2082  eschrock 			venext = ve->ve_next;
   1273   2082  eschrock 			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
   1274   2082  eschrock 				cenext = ce->ce_next;
   1275   2082  eschrock 				if (ce->ce_config)
   1276   2082  eschrock 					nvlist_free(ce->ce_config);
   1277   2082  eschrock 				free(ce);
   1278   2082  eschrock 			}
   1279   2082  eschrock 			free(ve);
   1280   2082  eschrock 		}
   1281   2082  eschrock 		free(pe);
   1282   2082  eschrock 	}
   1283   2082  eschrock 
   1284   2082  eschrock 	for (ne = pools.names; ne != NULL; ne = nenext) {
   1285   2082  eschrock 		nenext = ne->ne_next;
   1286   2082  eschrock 		if (ne->ne_name)
   1287   2082  eschrock 			free(ne->ne_name);
   1288   2082  eschrock 		free(ne);
   1289   2082  eschrock 	}
   1290   2082  eschrock 
   1291   4055  eschrock 	if (dirp)
   1292   4055  eschrock 		(void) closedir(dirp);
   1293    789    ahrens 
   1294    789    ahrens 	return (ret);
   1295    789    ahrens }
   1296   5363  eschrock 
   1297   6807  ck153898 nvlist_t *
   1298   6807  ck153898 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
   1299   6807  ck153898 {
   1300   6807  ck153898 	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
   1301   6807  ck153898 }
   1302   6807  ck153898 
   1303   6807  ck153898 nvlist_t *
   1304   6807  ck153898 zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
   1305   6807  ck153898     char *pool)
   1306   6807  ck153898 {
   1307   6807  ck153898 	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
   1308   6807  ck153898 }
   1309   6807  ck153898 
   1310   6807  ck153898 nvlist_t *
   1311   6807  ck153898 zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
   1312   6807  ck153898     uint64_t guid)
   1313   6807  ck153898 {
   1314   6807  ck153898 	return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
   1315   6807  ck153898 }
   1316   6807  ck153898 
   1317   6807  ck153898 nvlist_t *
   1318   6807  ck153898 zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
   1319   6807  ck153898 {
   1320   6807  ck153898 	return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
   1321   6807  ck153898 }
   1322   6807  ck153898 
   1323   5363  eschrock /*
   1324   5363  eschrock  * Given a cache file, return the contents as a list of importable pools.
   1325   6807  ck153898  * poolname or guid (but not both) are provided by the caller when trying
   1326   6807  ck153898  * to import a specific pool.
   1327   5363  eschrock  */
   1328   5363  eschrock nvlist_t *
   1329   5994  ck153898 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
   1330   6957  ck153898     char *poolname, uint64_t guid)
   1331   5363  eschrock {
   1332   5363  eschrock 	char *buf;
   1333   5363  eschrock 	int fd;
   1334   5363  eschrock 	struct stat64 statbuf;
   1335   5363  eschrock 	nvlist_t *raw, *src, *dst;
   1336   5363  eschrock 	nvlist_t *pools;
   1337   5363  eschrock 	nvpair_t *elem;
   1338   5363  eschrock 	char *name;
   1339   6807  ck153898 	uint64_t this_guid;
   1340   5363  eschrock 	boolean_t active;
   1341   6807  ck153898 
   1342   6807  ck153898 	verify(poolname == NULL || guid == 0);
   1343   5363  eschrock 
   1344   5363  eschrock 	if ((fd = open(cachefile, O_RDONLY)) < 0) {
   1345   5363  eschrock 		zfs_error_aux(hdl, "%s", strerror(errno));
   1346   5363  eschrock 		(void) zfs_error(hdl, EZFS_BADCACHE,
   1347   5363  eschrock 		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
   1348   5363  eschrock 		return (NULL);
   1349   5363  eschrock 	}
   1350   5363  eschrock 
   1351   5363  eschrock 	if (fstat64(fd, &statbuf) != 0) {
   1352   5363  eschrock 		zfs_error_aux(hdl, "%s", strerror(errno));
   1353   5363  eschrock 		(void) close(fd);
   1354   5363  eschrock 		(void) zfs_error(hdl, EZFS_BADCACHE,
   1355   5363  eschrock 		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
   1356   5363  eschrock 		return (NULL);
   1357   5363  eschrock 	}
   1358   5363  eschrock 
   1359   5363  eschrock 	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
   1360   5363  eschrock 		(void) close(fd);
   1361   5363  eschrock 		return (NULL);
   1362   5363  eschrock 	}
   1363   5363  eschrock 
   1364   5363  eschrock 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
   1365   5363  eschrock 		(void) close(fd);
   1366   5363  eschrock 		free(buf);
   1367   5363  eschrock 		(void) zfs_error(hdl, EZFS_BADCACHE,
   1368   5363  eschrock 		    dgettext(TEXT_DOMAIN,
   1369   5363  eschrock 		    "failed to read cache file contents"));
   1370   5363  eschrock 		return (NULL);
   1371   5363  eschrock 	}
   1372   5363  eschrock 
   1373   5363  eschrock 	(void) close(fd);
   1374   5363  eschrock 
   1375   5363  eschrock 	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
   1376   5363  eschrock 		free(buf);
   1377   5363  eschrock 		(void) zfs_error(hdl, EZFS_BADCACHE,
   1378   5363  eschrock 		    dgettext(TEXT_DOMAIN,
   1379   5363  eschrock 		    "invalid or corrupt cache file contents"));
   1380   5363  eschrock 		return (NULL);
   1381   5363  eschrock 	}
   1382   5363  eschrock 
   1383   5363  eschrock 	free(buf);
   1384   5363  eschrock 
   1385   5363  eschrock 	/*
   1386   5363  eschrock 	 * Go through and get the current state of the pools and refresh their
   1387   5363  eschrock 	 * state.
   1388   5363  eschrock 	 */
   1389   5363  eschrock 	if (nvlist_alloc(&pools, 0, 0) != 0) {
   1390   5363  eschrock 		(void) no_memory(hdl);
   1391   5363  eschrock 		nvlist_free(raw);
   1392   5363  eschrock 		return (NULL);
   1393   5363  eschrock 	}
   1394   5363  eschrock 
   1395   5363  eschrock 	elem = NULL;
   1396   5363  eschrock 	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
   1397   5363  eschrock 		verify(nvpair_value_nvlist(elem, &src) == 0);
   1398   5363  eschrock 
   1399   5363  eschrock 		verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
   1400   5363  eschrock 		    &name) == 0);
   1401   6807  ck153898 		if (poolname != NULL && strcmp(poolname, name) != 0)
   1402   6807  ck153898 			continue;
   1403   6807  ck153898 
   1404   5363  eschrock 		verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
   1405   6807  ck153898 		    &this_guid) == 0);
   1406   6807  ck153898 		if (guid != 0) {
   1407   6807  ck153898 			verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
   1408   6807  ck153898 			    &this_guid) == 0);
   1409   6807  ck153898 			if (guid != this_guid)
   1410   6807  ck153898 				continue;
   1411   6807  ck153898 		}
   1412   5363  eschrock 
   1413   6957  ck153898 		if (pool_active(hdl, name, this_guid, &active) != 0) {
   1414   6957  ck153898 			nvlist_free(raw);
   1415   6957  ck153898 			nvlist_free(pools);
   1416   6957  ck153898 			return (NULL);
   1417   6957  ck153898 		}
   1418   5994  ck153898 
   1419   6957  ck153898 		if (active)
   1420   6957  ck153898 			continue;
   1421   5994  ck153898 
   1422   6957  ck153898 		if ((dst = refresh_config(hdl, src)) == NULL) {
   1423   6957  ck153898 			nvlist_free(raw);
   1424   6957  ck153898 			nvlist_free(pools);
   1425   6957  ck153898 			return (NULL);
   1426   6957  ck153898 		}
   1427   5994  ck153898 
   1428   6957  ck153898 		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
   1429   6957  ck153898 			(void) no_memory(hdl);
   1430   5994  ck153898 			nvlist_free(dst);
   1431   6957  ck153898 			nvlist_free(raw);
   1432   6957  ck153898 			nvlist_free(pools);
   1433   6957  ck153898 			return (NULL);
   1434   5363  eschrock 		}
   1435   6957  ck153898 		nvlist_free(dst);
   1436   5363  eschrock 	}
   1437   5363  eschrock 
   1438   5363  eschrock 	nvlist_free(raw);
   1439   5363  eschrock 	return (pools);
   1440   5363  eschrock }
   1441   5363  eschrock 
   1442    789    ahrens 
   1443   2082  eschrock boolean_t
   1444    789    ahrens find_guid(nvlist_t *nv, uint64_t guid)
   1445    789    ahrens {
   1446    789    ahrens 	uint64_t tmp;
   1447    789    ahrens 	nvlist_t **child;
   1448    789    ahrens 	uint_t c, children;
   1449    789    ahrens 
   1450    789    ahrens 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
   1451    789    ahrens 	if (tmp == guid)
   1452   2082  eschrock 		return (B_TRUE);
   1453    789    ahrens 
   1454    789    ahrens 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
   1455    789    ahrens 	    &child, &children) == 0) {
   1456    789    ahrens 		for (c = 0; c < children; c++)
   1457    789    ahrens 			if (find_guid(child[c], guid))
   1458   2082  eschrock 				return (B_TRUE);
   1459    789    ahrens 	}
   1460    789    ahrens 
   1461   2082  eschrock 	return (B_FALSE);
   1462   2082  eschrock }
   1463   2082  eschrock 
   1464   5450   brendan typedef struct aux_cbdata {
   1465   5450   brendan 	const char	*cb_type;
   1466   2082  eschrock 	uint64_t	cb_guid;
   1467   2082  eschrock 	zpool_handle_t	*cb_zhp;
   1468   5450   brendan } aux_cbdata_t;
   1469   2082  eschrock 
   1470   2082  eschrock static int
   1471   5450   brendan find_aux(zpool_handle_t *zhp, void *data)
   1472   2082  eschrock {
   1473   5450   brendan 	aux_cbdata_t *cbp = data;
   1474   5450   brendan 	nvlist_t **list;
   1475   5450   brendan 	uint_t i, count;
   1476   2082  eschrock 	uint64_t guid;
   1477   2082  eschrock 	nvlist_t *nvroot;
   1478   2082  eschrock 
   1479   2082  eschrock 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
   1480   2082  eschrock 	    &nvroot) == 0);
   1481   2082  eschrock 
   1482   5450   brendan 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
   1483   5450   brendan 	    &list, &count) == 0) {
   1484   5450   brendan 		for (i = 0; i < count; i++) {
   1485   5450   brendan 			verify(nvlist_lookup_uint64(list[i],
   1486   2082  eschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
   1487   2082  eschrock 			if (guid == cbp->cb_guid) {
   1488   2082  eschrock 				cbp->cb_zhp = zhp;
   1489   2082  eschrock 				return (1);
   1490   2082  eschrock 			}
   1491   2082  eschrock 		}
   1492   2082  eschrock 	}
   1493   2082  eschrock 
   1494   2082  eschrock 	zpool_close(zhp);
   1495   2082  eschrock 	return (0);
   1496    789    ahrens }
   1497    789    ahrens 
   1498    789    ahrens /*
   1499   2082  eschrock  * Determines if the pool is in use.  If so, it returns true and the state of
   1500    789    ahrens  * the pool as well as the name of the pool.  Both strings are allocated and
   1501    789    ahrens  * must be freed by the caller.
   1502    789    ahrens  */
   1503    789    ahrens int
   1504   2082  eschrock zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
   1505   2082  eschrock     boolean_t *inuse)
   1506    789    ahrens {
   1507    789    ahrens 	nvlist_t *config;
   1508    789    ahrens 	char *name;
   1509   2082  eschrock 	boolean_t ret;
   1510    789    ahrens 	uint64_t guid, vdev_guid;
   1511    789    ahrens 	zpool_handle_t *zhp;
   1512    789    ahrens 	nvlist_t *pool_config;
   1513   3377  eschrock 	uint64_t stateval, isspare;
   1514   5450   brendan 	aux_cbdata_t cb = { 0 };
   1515   2142  eschrock 	boolean_t isactive;
   1516    789    ahrens 
   1517   2082  eschrock 	*inuse = B_FALSE;
   1518    789    ahrens 
   1519   2082  eschrock 	if (zpool_read_label(fd, &config) != 0) {
   1520   2082  eschrock 		(void) no_memory(hdl);
   1521   2082  eschrock 		return (-1);
   1522   2082  eschrock 	}
   1523   2082  eschrock 
   1524   2082  eschrock 	if (config == NULL)
   1525   2082  eschrock 		return (0);
   1526   2082  eschrock 
   1527    789    ahrens 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
   1528   1352  eschrock 	    &stateval) == 0);
   1529    789    ahrens 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
   1530    789    ahrens 	    &vdev_guid) == 0);
   1531    789    ahrens 
   1532   5450   brendan 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
   1533   2082  eschrock 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
   1534   2082  eschrock 		    &name) == 0);
   1535   2082  eschrock 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
   1536   2082  eschrock 		    &guid) == 0);
   1537   2082  eschrock 	}
   1538   2082  eschrock 
   1539   1352  eschrock 	switch (stateval) {
   1540    789    ahrens 	case POOL_STATE_EXPORTED:
   1541   2082  eschrock 		ret = B_TRUE;
   1542    789    ahrens 		break;
   1543    789    ahrens 
   1544    789    ahrens 	case POOL_STATE_ACTIVE:
   1545    789    ahrens 		/*
   1546    789    ahrens 		 * For an active pool, we have to determine if it's really part
   1547   1760  eschrock 		 * of a currently active pool (in which case the pool will exist
   1548   1760  eschrock 		 * and the guid will be the same), or whether it's part of an
   1549   1760  eschrock 		 * active pool that was disconnected without being explicitly
   1550   1760  eschrock 		 * exported.
   1551    789    ahrens 		 */
   1552   2142  eschrock 		if (pool_active(hdl, name, guid, &isactive) != 0) {
   1553   2142  eschrock 			nvlist_free(config);
   1554   2142  eschrock 			return (-1);
   1555   2142  eschrock 		}
   1556   2142  eschrock 
   1557   2142  eschrock 		if (isactive) {
   1558    789    ahrens 			/*
   1559    789    ahrens 			 * Because the device may have been removed while
   1560    789    ahrens 			 * offlined, we only report it as active if the vdev is
   1561    789    ahrens 			 * still present in the config.  Otherwise, pretend like
   1562    789    ahrens 			 * it's not in use.
   1563    789    ahrens 			 */
   1564   2082  eschrock 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
   1565    952  eschrock 			    (pool_config = zpool_get_config(zhp, NULL))
   1566    952  eschrock 			    != NULL) {
   1567    789    ahrens 				nvlist_t *nvroot;
   1568    789    ahrens 
   1569    789    ahrens 				verify(nvlist_lookup_nvlist(pool_config,
   1570    789    ahrens 				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
   1571   1352  eschrock 				ret = find_guid(nvroot, vdev_guid);
   1572    789    ahrens 			} else {
   1573   2082  eschrock 				ret = B_FALSE;
   1574    789    ahrens 			}
   1575   2082  eschrock 
   1576   3377  eschrock 			/*
   1577   3377  eschrock 			 * If this is an active spare within another pool, we
   1578   3377  eschrock 			 * treat it like an unused hot spare.  This allows the
   1579   3377  eschrock 			 * user to create a pool with a hot spare that currently
   1580   3377  eschrock 			 * in use within another pool.  Since we return B_TRUE,
   1581   3377  eschrock 			 * libdiskmgt will continue to prevent generic consumers
   1582   3377  eschrock 			 * from using the device.
   1583   3377  eschrock 			 */
   1584   3377  eschrock 			if (ret && nvlist_lookup_uint64(config,
   1585   3377  eschrock 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
   1586   3377  eschrock 				stateval = POOL_STATE_SPARE;
   1587   3377  eschrock 
   1588   2082  eschrock 			if (zhp != NULL)
   1589   2082  eschrock 				zpool_close(zhp);
   1590    789    ahrens 		} else {
   1591   1352  eschrock 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
   1592   2082  eschrock 			ret = B_TRUE;
   1593   2082  eschrock 		}
   1594   2082  eschrock 		break;
   1595   2082  eschrock 
   1596   2082  eschrock 	case POOL_STATE_SPARE:
   1597   2082  eschrock 		/*
   1598   2082  eschrock 		 * For a hot spare, it can be either definitively in use, or
   1599   2082  eschrock 		 * potentially active.  To determine if it's in use, we iterate
   1600   2082  eschrock 		 * over all pools in the system and search for one with a spare
   1601   2082  eschrock 		 * with a matching guid.
   1602   2082  eschrock 		 *
   1603   2082  eschrock 		 * Due to the shared nature of spares, we don't actually report
   1604   2082  eschrock 		 * the potentially active case as in use.  This means the user
   1605   2082  eschrock 		 * can freely create pools on the hot spares of exported pools,
   1606   2082  eschrock 		 * but to do otherwise makes the resulting code complicated, and
   1607   2082  eschrock 		 * we end up having to deal with this case anyway.
   1608   2082  eschrock 		 */
   1609   2082  eschrock 		cb.cb_zhp = NULL;
   1610   2082  eschrock 		cb.cb_guid = vdev_guid;
   1611   5450   brendan 		cb.cb_type = ZPOOL_CONFIG_SPARES;
   1612   5450   brendan 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
   1613   5450   brendan 			name = (char *)zpool_get_name(cb.cb_zhp);
   1614   5450   brendan 			ret = TRUE;
   1615   5450   brendan 		} else {
   1616   5450   brendan 			ret = FALSE;
   1617   5450   brendan 		}
   1618   5450   brendan 		break;
   1619   5450   brendan 
   1620   5450   brendan 	case POOL_STATE_L2CACHE:
   1621   5450   brendan 
   1622   5450   brendan 		/*
   1623   5450   brendan 		 * Check if any pool is currently using this l2cache device.
   1624   5450   brendan 		 */
   1625   5450   brendan 		cb.cb_zhp = NULL;
   1626   5450   brendan 		cb.cb_guid = vdev_guid;
   1627   5450   brendan 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
   1628   5450   brendan 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
   1629   2082  eschrock 			name = (char *)zpool_get_name(cb.cb_zhp);
   1630    789    ahrens 			ret = TRUE;
   1631   2082  eschrock 		} else {
   1632   2082  eschrock 			ret = FALSE;
   1633    789    ahrens 		}
   1634    789    ahrens 		break;
   1635    789    ahrens 
   1636    789    ahrens 	default:
   1637   2082  eschrock 		ret = B_FALSE;
   1638    789    ahrens 	}
   1639    789    ahrens 
   1640   1352  eschrock 
   1641   1352  eschrock 	if (ret) {
   1642   2082  eschrock 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
   1643   5501  eschrock 			if (cb.cb_zhp)
   1644   5501  eschrock 				zpool_close(cb.cb_zhp);
   1645   2082  eschrock 			nvlist_free(config);
   1646   2082  eschrock 			return (-1);
   1647   2082  eschrock 		}
   1648   1352  eschrock 		*state = (pool_state_t)stateval;
   1649   1352  eschrock 	}
   1650   1352  eschrock 
   1651   2082  eschrock 	if (cb.cb_zhp)
   1652   2082  eschrock 		zpool_close(cb.cb_zhp);
   1653   2082  eschrock 
   1654    789    ahrens 	nvlist_free(config);
   1655   2082  eschrock 	*inuse = ret;
   1656   2082  eschrock 	return (0);
   1657    789    ahrens }
   1658