Home | History | Annotate | Download | only in common
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22  10151    George  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens /*
     27    789    ahrens  * This file contains the functions which analyze the status of a pool.  This
     28    789    ahrens  * include both the status of an active pool, as well as the status exported
     29    789    ahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
     30    789    ahrens  * the pool.  This status is independent (to a certain degree) from the state of
     31   4451  eschrock  * the pool.  A pool's state describes only whether or not it is capable of
     32    789    ahrens  * providing the necessary fault tolerance for data.  The status describes the
     33    789    ahrens  * overall status of devices.  A pool that is online can still have a device
     34    789    ahrens  * that is experiencing errors.
     35    789    ahrens  *
     36    789    ahrens  * Only a subset of the possible faults can be detected using 'zpool status',
     37    789    ahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
     38    789    ahrens  * is left up to the caller, depending on whether it is a live pool or an
     39    789    ahrens  * import.
     40    789    ahrens  */
     41    789    ahrens 
     42    789    ahrens #include <libzfs.h>
     43    789    ahrens #include <string.h>
     44   3975  ek110237 #include <unistd.h>
     45    789    ahrens #include "libzfs_impl.h"
     46    789    ahrens 
     47    789    ahrens /*
     48   4451  eschrock  * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
     49    789    ahrens  * in libzfs.h.  Note that there are some status results which go past the end
     50    789    ahrens  * of this table, and hence have no associated message ID.
     51    789    ahrens  */
     52   3975  ek110237 static char *zfs_msgid_table[] = {
     53    789    ahrens 	"ZFS-8000-14",
     54    789    ahrens 	"ZFS-8000-2Q",
     55    789    ahrens 	"ZFS-8000-3C",
     56    789    ahrens 	"ZFS-8000-4J",
     57    789    ahrens 	"ZFS-8000-5E",
     58    789    ahrens 	"ZFS-8000-6X",
     59    789    ahrens 	"ZFS-8000-72",
     60    789    ahrens 	"ZFS-8000-8A",
     61    789    ahrens 	"ZFS-8000-9P",
     62   3975  ek110237 	"ZFS-8000-A5",
     63   6523  ek110237 	"ZFS-8000-EY",
     64   6523  ek110237 	"ZFS-8000-HC",
     65   7294    perrin 	"ZFS-8000-JQ",
     66   7294    perrin 	"ZFS-8000-K4",
     67   1544  eschrock };
     68   1544  eschrock 
     69   3975  ek110237 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
     70    789    ahrens 
     71    789    ahrens /* ARGSUSED */
     72    789    ahrens static int
     73    789    ahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
     74    789    ahrens {
     75    789    ahrens 	return (state == VDEV_STATE_CANT_OPEN &&
     76    789    ahrens 	    aux == VDEV_AUX_OPEN_FAILED);
     77    789    ahrens }
     78    789    ahrens 
     79    789    ahrens /* ARGSUSED */
     80    789    ahrens static int
     81   4451  eschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs)
     82   4451  eschrock {
     83   4451  eschrock 	return (state == VDEV_STATE_FAULTED);
     84   4451  eschrock }
     85   4451  eschrock 
     86   4451  eschrock /* ARGSUSED */
     87   4451  eschrock static int
     88    789    ahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
     89    789    ahrens {
     90   4451  eschrock 	return (state == VDEV_STATE_DEGRADED || errs != 0);
     91    789    ahrens }
     92    789    ahrens 
     93    789    ahrens /* ARGSUSED */
     94    789    ahrens static int
     95    789    ahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
     96    789    ahrens {
     97    789    ahrens 	return (state == VDEV_STATE_CANT_OPEN);
     98    789    ahrens }
     99    789    ahrens 
    100    789    ahrens /* ARGSUSED */
    101    789    ahrens static int
    102    789    ahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
    103    789    ahrens {
    104    789    ahrens 	return (state == VDEV_STATE_OFFLINE);
    105  10151    George }
    106  10151    George 
    107  10151    George /* ARGSUSED */
    108  10151    George static int
    109  10151    George vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
    110  10151    George {
    111  10151    George 	return (state == VDEV_STATE_REMOVED);
    112    789    ahrens }
    113    789    ahrens 
    114    789    ahrens /*
    115    789    ahrens  * Detect if any leaf devices that have seen errors or could not be opened.
    116    789    ahrens  */
    117   2082  eschrock static boolean_t
    118    789    ahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
    119    789    ahrens {
    120    789    ahrens 	nvlist_t **child;
    121    789    ahrens 	vdev_stat_t *vs;
    122    789    ahrens 	uint_t c, children;
    123    789    ahrens 	char *type;
    124    789    ahrens 
    125    789    ahrens 	/*
    126    789    ahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
    127    789    ahrens 	 * the process of repairing any such errors, and don't want to call them
    128    789    ahrens 	 * out again.  We'll pick up the fact that a resilver is happening
    129    789    ahrens 	 * later.
    130    789    ahrens 	 */
    131    789    ahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
    132    789    ahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
    133   2082  eschrock 		return (B_FALSE);
    134    789    ahrens 
    135    789    ahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
    136    789    ahrens 	    &children) == 0) {
    137    789    ahrens 		for (c = 0; c < children; c++)
    138    789    ahrens 			if (find_vdev_problem(child[c], func))
    139   2082  eschrock 				return (B_TRUE);
    140    789    ahrens 	} else {
    141    789    ahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
    142    789    ahrens 		    (uint64_t **)&vs, &c) == 0);
    143    789    ahrens 
    144    789    ahrens 		if (func(vs->vs_state, vs->vs_aux,
    145    789    ahrens 		    vs->vs_read_errors +
    146    789    ahrens 		    vs->vs_write_errors +
    147    789    ahrens 		    vs->vs_checksum_errors))
    148   2082  eschrock 			return (B_TRUE);
    149    789    ahrens 	}
    150    789    ahrens 
    151   2082  eschrock 	return (B_FALSE);
    152    789    ahrens }
    153    789    ahrens 
    154    789    ahrens /*
    155    789    ahrens  * Active pool health status.
    156    789    ahrens  *
    157    789    ahrens  * To determine the status for a pool, we make several passes over the config,
    158    789    ahrens  * picking the most egregious error we find.  In order of importance, we do the
    159    789    ahrens  * following:
    160    789    ahrens  *
    161    789    ahrens  *	- Check for a complete and valid configuration
    162   4451  eschrock  *	- Look for any faulted or missing devices in a non-replicated config
    163   1544  eschrock  *	- Check for any data errors
    164   4451  eschrock  *	- Check for any faulted or missing devices in a replicated config
    165    789    ahrens  *	- Look for any devices showing errors
    166    789    ahrens  *	- Check for any resilvering devices
    167    789    ahrens  *
    168    789    ahrens  * There can obviously be multiple errors within a single pool, so this routine
    169    789    ahrens  * only picks the most damaging of all the current errors to report.
    170    789    ahrens  */
    171    789    ahrens static zpool_status_t
    172   7754      Jeff check_status(nvlist_t *config, boolean_t isimport)
    173    789    ahrens {
    174    789    ahrens 	nvlist_t *nvroot;
    175    789    ahrens 	vdev_stat_t *vs;
    176    789    ahrens 	uint_t vsc;
    177   1544  eschrock 	uint64_t nerr;
    178   1760  eschrock 	uint64_t version;
    179   3975  ek110237 	uint64_t stateval;
    180   7754      Jeff 	uint64_t suspended;
    181   3975  ek110237 	uint64_t hostid = 0;
    182    789    ahrens 
    183   1760  eschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
    184   1760  eschrock 	    &version) == 0);
    185    789    ahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
    186    789    ahrens 	    &nvroot) == 0);
    187    789    ahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
    188    789    ahrens 	    (uint64_t **)&vs, &vsc) == 0);
    189   3975  ek110237 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
    190   3975  ek110237 	    &stateval) == 0);
    191   3975  ek110237 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
    192   3975  ek110237 
    193   3975  ek110237 	/*
    194   3975  ek110237 	 * Pool last accessed by another system.
    195   3975  ek110237 	 */
    196   3975  ek110237 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
    197   3975  ek110237 	    stateval == POOL_STATE_ACTIVE)
    198   3975  ek110237 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
    199   1760  eschrock 
    200   1760  eschrock 	/*
    201   1760  eschrock 	 * Newer on-disk version.
    202   1760  eschrock 	 */
    203   1760  eschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    204   1760  eschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
    205   1760  eschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
    206    789    ahrens 
    207    789    ahrens 	/*
    208    789    ahrens 	 * Check that the config is complete.
    209    789    ahrens 	 */
    210    789    ahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    211   1544  eschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
    212    789    ahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
    213   6523  ek110237 
    214   6523  ek110237 	/*
    215   7754      Jeff 	 * Check whether the pool has suspended due to failed I/O.
    216   6523  ek110237 	 */
    217   7754      Jeff 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
    218   7754      Jeff 	    &suspended) == 0) {
    219   7754      Jeff 		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
    220   6523  ek110237 			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
    221   7754      Jeff 		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
    222   6523  ek110237 	}
    223   1544  eschrock 
    224   1544  eschrock 	/*
    225   7294    perrin 	 * Could not read a log.
    226   7294    perrin 	 */
    227   7294    perrin 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    228   7294    perrin 	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
    229   7294    perrin 		return (ZPOOL_STATUS_BAD_LOG);
    230   7294    perrin 	}
    231   7294    perrin 
    232   7294    perrin 	/*
    233   4451  eschrock 	 * Bad devices in non-replicated config.
    234   1544  eschrock 	 */
    235   4451  eschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    236   4451  eschrock 	    find_vdev_problem(nvroot, vdev_faulted))
    237   4451  eschrock 		return (ZPOOL_STATUS_FAULTED_DEV_NR);
    238   4451  eschrock 
    239   1544  eschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    240   1544  eschrock 	    find_vdev_problem(nvroot, vdev_missing))
    241   1544  eschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
    242   1544  eschrock 
    243   1544  eschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    244   1544  eschrock 	    find_vdev_problem(nvroot, vdev_broken))
    245   1544  eschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
    246   1544  eschrock 
    247   1544  eschrock 	/*
    248   1544  eschrock 	 * Corrupted pool metadata
    249   1544  eschrock 	 */
    250   1544  eschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
    251   1544  eschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
    252   1544  eschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
    253   1544  eschrock 
    254   1544  eschrock 	/*
    255   1544  eschrock 	 * Persistent data errors.
    256   1544  eschrock 	 */
    257   1544  eschrock 	if (!isimport) {
    258   1544  eschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
    259   1544  eschrock 		    &nerr) == 0 && nerr != 0)
    260   1544  eschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
    261    789    ahrens 	}
    262    789    ahrens 
    263    789    ahrens 	/*
    264   1544  eschrock 	 * Missing devices in a replicated config.
    265    789    ahrens 	 */
    266   4451  eschrock 	if (find_vdev_problem(nvroot, vdev_faulted))
    267   4451  eschrock 		return (ZPOOL_STATUS_FAULTED_DEV_R);
    268   1544  eschrock 	if (find_vdev_problem(nvroot, vdev_missing))
    269   1544  eschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
    270   1544  eschrock 	if (find_vdev_problem(nvroot, vdev_broken))
    271   1544  eschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
    272    789    ahrens 
    273    789    ahrens 	/*
    274    789    ahrens 	 * Devices with errors
    275    789    ahrens 	 */
    276    789    ahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
    277    789    ahrens 		return (ZPOOL_STATUS_FAILING_DEV);
    278    789    ahrens 
    279    789    ahrens 	/*
    280    789    ahrens 	 * Offlined devices
    281    789    ahrens 	 */
    282    789    ahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
    283    789    ahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
    284    789    ahrens 
    285    789    ahrens 	/*
    286  10151    George 	 * Removed device
    287  10151    George 	 */
    288  10151    George 	if (find_vdev_problem(nvroot, vdev_removed))
    289  10151    George 		return (ZPOOL_STATUS_REMOVED_DEV);
    290  10151    George 
    291  10151    George 	/*
    292    789    ahrens 	 * Currently resilvering
    293    789    ahrens 	 */
    294    789    ahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
    295    789    ahrens 		return (ZPOOL_STATUS_RESILVERING);
    296    789    ahrens 
    297    789    ahrens 	/*
    298   1760  eschrock 	 * Outdated, but usable, version
    299    789    ahrens 	 */
    300   4577    ahrens 	if (version < SPA_VERSION)
    301   1760  eschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
    302    789    ahrens 
    303    789    ahrens 	return (ZPOOL_STATUS_OK);
    304    789    ahrens }
    305    789    ahrens 
    306    789    ahrens zpool_status_t
    307    789    ahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
    308    789    ahrens {
    309   7754      Jeff 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
    310    789    ahrens 
    311    789    ahrens 	if (ret >= NMSGID)
    312    789    ahrens 		*msgid = NULL;
    313    789    ahrens 	else
    314   4451  eschrock 		*msgid = zfs_msgid_table[ret];
    315    789    ahrens 
    316    789    ahrens 	return (ret);
    317    789    ahrens }
    318    789    ahrens 
    319    789    ahrens zpool_status_t
    320    789    ahrens zpool_import_status(nvlist_t *config, char **msgid)
    321    789    ahrens {
    322   7754      Jeff 	zpool_status_t ret = check_status(config, B_TRUE);
    323    789    ahrens 
    324    789    ahrens 	if (ret >= NMSGID)
    325    789    ahrens 		*msgid = NULL;
    326    789    ahrens 	else
    327   3975  ek110237 		*msgid = zfs_msgid_table[ret];
    328    789    ahrens 
    329    789    ahrens 	return (ret);
    330    789    ahrens }
    331  11149    George 
    332  11149    George static void
    333  11149    George dump_ddt_stat(const ddt_stat_t *dds, int h)
    334  11149    George {
    335  11149    George 	char refcnt[6];
    336  11149    George 	char blocks[6], lsize[6], psize[6], dsize[6];
    337  11149    George 	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
    338  11149    George 
    339  11149    George 	if (dds == NULL || dds->dds_blocks == 0)
    340  11149    George 		return;
    341  11149    George 
    342  11149    George 	if (h == -1)
    343  11149    George 		(void) strcpy(refcnt, "Total");
    344  11149    George 	else
    345  11149    George 		zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
    346  11149    George 
    347  11149    George 	zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
    348  11149    George 	zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
    349  11149    George 	zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
    350  11149    George 	zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
    351  11149    George 	zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
    352  11149    George 	zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
    353  11149    George 	zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
    354  11149    George 	zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
    355  11149    George 
    356  11149    George 	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
    357  11149    George 	    refcnt,
    358  11149    George 	    blocks, lsize, psize, dsize,
    359  11149    George 	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
    360  11149    George }
    361  11149    George 
    362  11149    George /*
    363  11149    George  * Print the DDT histogram and the column totals.
    364  11149    George  */
    365  11149    George void
    366  11149    George zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
    367  11149    George {
    368  11149    George 	int h;
    369  11149    George 
    370  11149    George 	(void) printf("\n");
    371  11149    George 
    372  11149    George 	(void) printf("bucket   "
    373  11149    George 	    "           allocated             "
    374  11149    George 	    "          referenced          \n");
    375  11149    George 	(void) printf("______   "
    376  11149    George 	    "______________________________   "
    377  11149    George 	    "______________________________\n");
    378  11149    George 
    379  11149    George 	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
    380  11149    George 	    "refcnt",
    381  11149    George 	    "blocks", "LSIZE", "PSIZE", "DSIZE",
    382  11149    George 	    "blocks", "LSIZE", "PSIZE", "DSIZE");
    383  11149    George 
    384  11149    George 	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
    385  11149    George 	    "------",
    386  11149    George 	    "------", "-----", "-----", "-----",
    387  11149    George 	    "------", "-----", "-----", "-----");
    388  11149    George 
    389  11149    George 	for (h = 0; h < 64; h++)
    390  11149    George 		dump_ddt_stat(&ddh->ddh_stat[h], h);
    391  11149    George 
    392  11149    George 	dump_ddt_stat(dds_total, -1);
    393  11149    George 
    394  11149    George 	(void) printf("\n");
    395  11149    George }
    396