1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 10151 George * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens /* 27 789 ahrens * This file contains the functions which analyze the status of a pool. This 28 789 ahrens * include both the status of an active pool, as well as the status exported 29 789 ahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 30 789 ahrens * the pool. This status is independent (to a certain degree) from the state of 31 4451 eschrock * the pool. A pool's state describes only whether or not it is capable of 32 789 ahrens * providing the necessary fault tolerance for data. The status describes the 33 789 ahrens * overall status of devices. A pool that is online can still have a device 34 789 ahrens * that is experiencing errors. 35 789 ahrens * 36 789 ahrens * Only a subset of the possible faults can be detected using 'zpool status', 37 789 ahrens * and not all possible errors correspond to a FMA message ID. The explanation 38 789 ahrens * is left up to the caller, depending on whether it is a live pool or an 39 789 ahrens * import. 40 789 ahrens */ 41 789 ahrens 42 789 ahrens #include <libzfs.h> 43 789 ahrens #include <string.h> 44 3975 ek110237 #include <unistd.h> 45 789 ahrens #include "libzfs_impl.h" 46 789 ahrens 47 789 ahrens /* 48 4451 eschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 49 789 ahrens * in libzfs.h. Note that there are some status results which go past the end 50 789 ahrens * of this table, and hence have no associated message ID. 51 789 ahrens */ 52 3975 ek110237 static char *zfs_msgid_table[] = { 53 789 ahrens "ZFS-8000-14", 54 789 ahrens "ZFS-8000-2Q", 55 789 ahrens "ZFS-8000-3C", 56 789 ahrens "ZFS-8000-4J", 57 789 ahrens "ZFS-8000-5E", 58 789 ahrens "ZFS-8000-6X", 59 789 ahrens "ZFS-8000-72", 60 789 ahrens "ZFS-8000-8A", 61 789 ahrens "ZFS-8000-9P", 62 3975 ek110237 "ZFS-8000-A5", 63 6523 ek110237 "ZFS-8000-EY", 64 6523 ek110237 "ZFS-8000-HC", 65 7294 perrin "ZFS-8000-JQ", 66 7294 perrin "ZFS-8000-K4", 67 1544 eschrock }; 68 1544 eschrock 69 3975 ek110237 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 70 789 ahrens 71 789 ahrens /* ARGSUSED */ 72 789 ahrens static int 73 789 ahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 74 789 ahrens { 75 789 ahrens return (state == VDEV_STATE_CANT_OPEN && 76 789 ahrens aux == VDEV_AUX_OPEN_FAILED); 77 789 ahrens } 78 789 ahrens 79 789 ahrens /* ARGSUSED */ 80 789 ahrens static int 81 4451 eschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 82 4451 eschrock { 83 4451 eschrock return (state == VDEV_STATE_FAULTED); 84 4451 eschrock } 85 4451 eschrock 86 4451 eschrock /* ARGSUSED */ 87 4451 eschrock static int 88 789 ahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 89 789 ahrens { 90 4451 eschrock return (state == VDEV_STATE_DEGRADED || errs != 0); 91 789 ahrens } 92 789 ahrens 93 789 ahrens /* ARGSUSED */ 94 789 ahrens static int 95 789 ahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 96 789 ahrens { 97 789 ahrens return (state == VDEV_STATE_CANT_OPEN); 98 789 ahrens } 99 789 ahrens 100 789 ahrens /* ARGSUSED */ 101 789 ahrens static int 102 789 ahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 103 789 ahrens { 104 789 ahrens return (state == VDEV_STATE_OFFLINE); 105 10151 George } 106 10151 George 107 10151 George /* ARGSUSED */ 108 10151 George static int 109 10151 George vdev_removed(uint64_t state, uint64_t aux, uint64_t errs) 110 10151 George { 111 10151 George return (state == VDEV_STATE_REMOVED); 112 789 ahrens } 113 789 ahrens 114 789 ahrens /* 115 789 ahrens * Detect if any leaf devices that have seen errors or could not be opened. 116 789 ahrens */ 117 2082 eschrock static boolean_t 118 789 ahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 119 789 ahrens { 120 789 ahrens nvlist_t **child; 121 789 ahrens vdev_stat_t *vs; 122 789 ahrens uint_t c, children; 123 789 ahrens char *type; 124 789 ahrens 125 789 ahrens /* 126 789 ahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 127 789 ahrens * the process of repairing any such errors, and don't want to call them 128 789 ahrens * out again. We'll pick up the fact that a resilver is happening 129 789 ahrens * later. 130 789 ahrens */ 131 789 ahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 132 789 ahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 133 2082 eschrock return (B_FALSE); 134 789 ahrens 135 789 ahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 136 789 ahrens &children) == 0) { 137 789 ahrens for (c = 0; c < children; c++) 138 789 ahrens if (find_vdev_problem(child[c], func)) 139 2082 eschrock return (B_TRUE); 140 789 ahrens } else { 141 789 ahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 142 789 ahrens (uint64_t **)&vs, &c) == 0); 143 789 ahrens 144 789 ahrens if (func(vs->vs_state, vs->vs_aux, 145 789 ahrens vs->vs_read_errors + 146 789 ahrens vs->vs_write_errors + 147 789 ahrens vs->vs_checksum_errors)) 148 2082 eschrock return (B_TRUE); 149 789 ahrens } 150 789 ahrens 151 2082 eschrock return (B_FALSE); 152 789 ahrens } 153 789 ahrens 154 789 ahrens /* 155 789 ahrens * Active pool health status. 156 789 ahrens * 157 789 ahrens * To determine the status for a pool, we make several passes over the config, 158 789 ahrens * picking the most egregious error we find. In order of importance, we do the 159 789 ahrens * following: 160 789 ahrens * 161 789 ahrens * - Check for a complete and valid configuration 162 4451 eschrock * - Look for any faulted or missing devices in a non-replicated config 163 1544 eschrock * - Check for any data errors 164 4451 eschrock * - Check for any faulted or missing devices in a replicated config 165 789 ahrens * - Look for any devices showing errors 166 789 ahrens * - Check for any resilvering devices 167 789 ahrens * 168 789 ahrens * There can obviously be multiple errors within a single pool, so this routine 169 789 ahrens * only picks the most damaging of all the current errors to report. 170 789 ahrens */ 171 789 ahrens static zpool_status_t 172 7754 Jeff check_status(nvlist_t *config, boolean_t isimport) 173 789 ahrens { 174 789 ahrens nvlist_t *nvroot; 175 789 ahrens vdev_stat_t *vs; 176 789 ahrens uint_t vsc; 177 1544 eschrock uint64_t nerr; 178 1760 eschrock uint64_t version; 179 3975 ek110237 uint64_t stateval; 180 7754 Jeff uint64_t suspended; 181 3975 ek110237 uint64_t hostid = 0; 182 789 ahrens 183 1760 eschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 184 1760 eschrock &version) == 0); 185 789 ahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 186 789 ahrens &nvroot) == 0); 187 789 ahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 188 789 ahrens (uint64_t **)&vs, &vsc) == 0); 189 3975 ek110237 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 190 3975 ek110237 &stateval) == 0); 191 3975 ek110237 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 192 3975 ek110237 193 3975 ek110237 /* 194 3975 ek110237 * Pool last accessed by another system. 195 3975 ek110237 */ 196 3975 ek110237 if (hostid != 0 && (unsigned long)hostid != gethostid() && 197 3975 ek110237 stateval == POOL_STATE_ACTIVE) 198 3975 ek110237 return (ZPOOL_STATUS_HOSTID_MISMATCH); 199 1760 eschrock 200 1760 eschrock /* 201 1760 eschrock * Newer on-disk version. 202 1760 eschrock */ 203 1760 eschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 204 1760 eschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 205 1760 eschrock return (ZPOOL_STATUS_VERSION_NEWER); 206 789 ahrens 207 789 ahrens /* 208 789 ahrens * Check that the config is complete. 209 789 ahrens */ 210 789 ahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 211 1544 eschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 212 789 ahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 213 6523 ek110237 214 6523 ek110237 /* 215 7754 Jeff * Check whether the pool has suspended due to failed I/O. 216 6523 ek110237 */ 217 7754 Jeff if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 218 7754 Jeff &suspended) == 0) { 219 7754 Jeff if (suspended == ZIO_FAILURE_MODE_CONTINUE) 220 6523 ek110237 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 221 7754 Jeff return (ZPOOL_STATUS_IO_FAILURE_WAIT); 222 6523 ek110237 } 223 1544 eschrock 224 1544 eschrock /* 225 7294 perrin * Could not read a log. 226 7294 perrin */ 227 7294 perrin if (vs->vs_state == VDEV_STATE_CANT_OPEN && 228 7294 perrin vs->vs_aux == VDEV_AUX_BAD_LOG) { 229 7294 perrin return (ZPOOL_STATUS_BAD_LOG); 230 7294 perrin } 231 7294 perrin 232 7294 perrin /* 233 4451 eschrock * Bad devices in non-replicated config. 234 1544 eschrock */ 235 4451 eschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 236 4451 eschrock find_vdev_problem(nvroot, vdev_faulted)) 237 4451 eschrock return (ZPOOL_STATUS_FAULTED_DEV_NR); 238 4451 eschrock 239 1544 eschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 240 1544 eschrock find_vdev_problem(nvroot, vdev_missing)) 241 1544 eschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 242 1544 eschrock 243 1544 eschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 244 1544 eschrock find_vdev_problem(nvroot, vdev_broken)) 245 1544 eschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 246 1544 eschrock 247 1544 eschrock /* 248 1544 eschrock * Corrupted pool metadata 249 1544 eschrock */ 250 1544 eschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 251 1544 eschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 252 1544 eschrock return (ZPOOL_STATUS_CORRUPT_POOL); 253 1544 eschrock 254 1544 eschrock /* 255 1544 eschrock * Persistent data errors. 256 1544 eschrock */ 257 1544 eschrock if (!isimport) { 258 1544 eschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 259 1544 eschrock &nerr) == 0 && nerr != 0) 260 1544 eschrock return (ZPOOL_STATUS_CORRUPT_DATA); 261 789 ahrens } 262 789 ahrens 263 789 ahrens /* 264 1544 eschrock * Missing devices in a replicated config. 265 789 ahrens */ 266 4451 eschrock if (find_vdev_problem(nvroot, vdev_faulted)) 267 4451 eschrock return (ZPOOL_STATUS_FAULTED_DEV_R); 268 1544 eschrock if (find_vdev_problem(nvroot, vdev_missing)) 269 1544 eschrock return (ZPOOL_STATUS_MISSING_DEV_R); 270 1544 eschrock if (find_vdev_problem(nvroot, vdev_broken)) 271 1544 eschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 272 789 ahrens 273 789 ahrens /* 274 789 ahrens * Devices with errors 275 789 ahrens */ 276 789 ahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 277 789 ahrens return (ZPOOL_STATUS_FAILING_DEV); 278 789 ahrens 279 789 ahrens /* 280 789 ahrens * Offlined devices 281 789 ahrens */ 282 789 ahrens if (find_vdev_problem(nvroot, vdev_offlined)) 283 789 ahrens return (ZPOOL_STATUS_OFFLINE_DEV); 284 789 ahrens 285 789 ahrens /* 286 10151 George * Removed device 287 10151 George */ 288 10151 George if (find_vdev_problem(nvroot, vdev_removed)) 289 10151 George return (ZPOOL_STATUS_REMOVED_DEV); 290 10151 George 291 10151 George /* 292 789 ahrens * Currently resilvering 293 789 ahrens */ 294 789 ahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 295 789 ahrens return (ZPOOL_STATUS_RESILVERING); 296 789 ahrens 297 789 ahrens /* 298 1760 eschrock * Outdated, but usable, version 299 789 ahrens */ 300 4577 ahrens if (version < SPA_VERSION) 301 1760 eschrock return (ZPOOL_STATUS_VERSION_OLDER); 302 789 ahrens 303 789 ahrens return (ZPOOL_STATUS_OK); 304 789 ahrens } 305 789 ahrens 306 789 ahrens zpool_status_t 307 789 ahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 308 789 ahrens { 309 7754 Jeff zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 310 789 ahrens 311 789 ahrens if (ret >= NMSGID) 312 789 ahrens *msgid = NULL; 313 789 ahrens else 314 4451 eschrock *msgid = zfs_msgid_table[ret]; 315 789 ahrens 316 789 ahrens return (ret); 317 789 ahrens } 318 789 ahrens 319 789 ahrens zpool_status_t 320 789 ahrens zpool_import_status(nvlist_t *config, char **msgid) 321 789 ahrens { 322 7754 Jeff zpool_status_t ret = check_status(config, B_TRUE); 323 789 ahrens 324 789 ahrens if (ret >= NMSGID) 325 789 ahrens *msgid = NULL; 326 789 ahrens else 327 3975 ek110237 *msgid = zfs_msgid_table[ret]; 328 789 ahrens 329 789 ahrens return (ret); 330 789 ahrens } 331 11149 George 332 11149 George static void 333 11149 George dump_ddt_stat(const ddt_stat_t *dds, int h) 334 11149 George { 335 11149 George char refcnt[6]; 336 11149 George char blocks[6], lsize[6], psize[6], dsize[6]; 337 11149 George char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; 338 11149 George 339 11149 George if (dds == NULL || dds->dds_blocks == 0) 340 11149 George return; 341 11149 George 342 11149 George if (h == -1) 343 11149 George (void) strcpy(refcnt, "Total"); 344 11149 George else 345 11149 George zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); 346 11149 George 347 11149 George zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); 348 11149 George zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); 349 11149 George zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); 350 11149 George zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); 351 11149 George zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); 352 11149 George zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); 353 11149 George zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); 354 11149 George zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); 355 11149 George 356 11149 George (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 357 11149 George refcnt, 358 11149 George blocks, lsize, psize, dsize, 359 11149 George ref_blocks, ref_lsize, ref_psize, ref_dsize); 360 11149 George } 361 11149 George 362 11149 George /* 363 11149 George * Print the DDT histogram and the column totals. 364 11149 George */ 365 11149 George void 366 11149 George zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) 367 11149 George { 368 11149 George int h; 369 11149 George 370 11149 George (void) printf("\n"); 371 11149 George 372 11149 George (void) printf("bucket " 373 11149 George " allocated " 374 11149 George " referenced \n"); 375 11149 George (void) printf("______ " 376 11149 George "______________________________ " 377 11149 George "______________________________\n"); 378 11149 George 379 11149 George (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 380 11149 George "refcnt", 381 11149 George "blocks", "LSIZE", "PSIZE", "DSIZE", 382 11149 George "blocks", "LSIZE", "PSIZE", "DSIZE"); 383 11149 George 384 11149 George (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 385 11149 George "------", 386 11149 George "------", "-----", "-----", "-----", 387 11149 George "------", "-----", "-----", "-----"); 388 11149 George 389 11149 George for (h = 0; h < 64; h++) 390 11149 George dump_ddt_stat(&ddh->ddh_stat[h], h); 391 11149 George 392 11149 George dump_ddt_stat(dds_total, -1); 393 11149 George 394 11149 George (void) printf("\n"); 395 11149 George } 396