Home | History | Annotate | Download | only in zfs
      1   1544  eschrock /*
      2   1544  eschrock  * CDDL HEADER START
      3   1544  eschrock  *
      4   1544  eschrock  * The contents of this file are subject to the terms of the
      5   1544  eschrock  * Common Development and Distribution License (the "License").
      6   1544  eschrock  * You may not use this file except in compliance with the License.
      7   1544  eschrock  *
      8   1544  eschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   1544  eschrock  * or http://www.opensolaris.org/os/licensing.
     10   1544  eschrock  * See the License for the specific language governing permissions
     11   1544  eschrock  * and limitations under the License.
     12   1544  eschrock  *
     13   1544  eschrock  * When distributing Covered Code, include this CDDL HEADER in each
     14   1544  eschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   1544  eschrock  * If applicable, add the following below this CDDL HEADER, with the
     16   1544  eschrock  * fields enclosed by brackets "[]" replaced with your own identifying
     17   1544  eschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   1544  eschrock  *
     19   1544  eschrock  * CDDL HEADER END
     20   1544  eschrock  */
     21   1544  eschrock /*
     22   9425      Eric  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23   1544  eschrock  * Use is subject to license terms.
     24   1544  eschrock  */
     25   1544  eschrock 
     26   1544  eschrock #include <sys/spa.h>
     27   1544  eschrock #include <sys/spa_impl.h>
     28   1544  eschrock #include <sys/vdev.h>
     29   1544  eschrock #include <sys/vdev_impl.h>
     30   1544  eschrock #include <sys/zio.h>
     31  10614  Jonathan #include <sys/zio_checksum.h>
     32   1544  eschrock 
     33   1544  eschrock #include <sys/fm/fs/zfs.h>
     34   1544  eschrock #include <sys/fm/protocol.h>
     35   1544  eschrock #include <sys/fm/util.h>
     36   1544  eschrock #include <sys/sysevent.h>
     37   1544  eschrock 
     38   1544  eschrock /*
     39   1544  eschrock  * This general routine is responsible for generating all the different ZFS
     40   1544  eschrock  * ereports.  The payload is dependent on the class, and which arguments are
     41   1544  eschrock  * supplied to the function:
     42   1544  eschrock  *
     43   1544  eschrock  * 	EREPORT			POOL	VDEV	IO
     44   1544  eschrock  * 	block			X	X	X
     45   1544  eschrock  * 	data			X		X
     46   1544  eschrock  * 	device			X	X
     47   1544  eschrock  * 	pool			X
     48   1544  eschrock  *
     49   1544  eschrock  * If we are in a loading state, all errors are chained together by the same
     50   6523  ek110237  * SPA-wide ENA (Error Numeric Association).
     51   1544  eschrock  *
     52   1544  eschrock  * For isolated I/O requests, we get the ENA from the zio_t. The propagation
     53   1544  eschrock  * gets very complicated due to RAID-Z, gang blocks, and vdev caching.  We want
     54   1544  eschrock  * to chain together all ereports associated with a logical piece of data.  For
     55   1544  eschrock  * read I/Os, there  are basically three 'types' of I/O, which form a roughly
     56   1544  eschrock  * layered diagram:
     57   1544  eschrock  *
     58   1544  eschrock  *      +---------------+
     59   1544  eschrock  * 	| Aggregate I/O |	No associated logical data or device
     60   1544  eschrock  * 	+---------------+
     61   1544  eschrock  *              |
     62   1544  eschrock  *              V
     63   1544  eschrock  * 	+---------------+	Reads associated with a piece of logical data.
     64   1544  eschrock  * 	|   Read I/O    |	This includes reads on behalf of RAID-Z,
     65   1544  eschrock  * 	+---------------+       mirrors, gang blocks, retries, etc.
     66   1544  eschrock  *              |
     67   1544  eschrock  *              V
     68   1544  eschrock  * 	+---------------+	Reads associated with a particular device, but
     69   1544  eschrock  * 	| Physical I/O  |	no logical data.  Issued as part of vdev caching
     70   1544  eschrock  * 	+---------------+	and I/O aggregation.
     71   1544  eschrock  *
     72   1544  eschrock  * Note that 'physical I/O' here is not the same terminology as used in the rest
     73   1544  eschrock  * of ZIO.  Typically, 'physical I/O' simply means that there is no attached
     74   1544  eschrock  * blockpointer.  But I/O with no associated block pointer can still be related
     75   1544  eschrock  * to a logical piece of data (i.e. RAID-Z requests).
     76   1544  eschrock  *
     77   1544  eschrock  * Purely physical I/O always have unique ENAs.  They are not related to a
     78   1544  eschrock  * particular piece of logical data, and therefore cannot be chained together.
     79   1544  eschrock  * We still generate an ereport, but the DE doesn't correlate it with any
     80   1544  eschrock  * logical piece of data.  When such an I/O fails, the delegated I/O requests
     81   1544  eschrock  * will issue a retry, which will trigger the 'real' ereport with the correct
     82   1544  eschrock  * ENA.
     83   1544  eschrock  *
     84   1544  eschrock  * We keep track of the ENA for a ZIO chain through the 'io_logical' member.
     85   1544  eschrock  * When a new logical I/O is issued, we set this to point to itself.  Child I/Os
     86   1544  eschrock  * then inherit this pointer, so that when it is first set subsequent failures
     87   7754      Jeff  * will use the same ENA.  For vdev cache fill and queue aggregation I/O,
     88   7754      Jeff  * this pointer is set to NULL, and no ereport will be generated (since it
     89   7754      Jeff  * doesn't actually correspond to any particular device or piece of data,
     90   7754      Jeff  * and the caller will always retry without caching or queueing anyway).
     91  10614  Jonathan  *
     92  10614  Jonathan  * For checksum errors, we want to include more information about the actual
     93  10614  Jonathan  * error which occurs.  Accordingly, we build an ereport when the error is
     94  10614  Jonathan  * noticed, but instead of sending it in immediately, we hang it off of the
     95  10614  Jonathan  * io_cksum_report field of the logical IO.  When the logical IO completes
     96  10614  Jonathan  * (successfully or not), zfs_ereport_finish_checksum() is called with the
     97  10614  Jonathan  * good and bad versions of the buffer (if available), and we annotate the
     98  10614  Jonathan  * ereport with information about the differences.
     99   1544  eschrock  */
    100  10614  Jonathan #ifdef _KERNEL
    101  10614  Jonathan static void
    102  10614  Jonathan zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
    103  10614  Jonathan     const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
    104   1544  eschrock     uint64_t stateoroffset, uint64_t size)
    105   1544  eschrock {
    106   1544  eschrock 	nvlist_t *ereport, *detector;
    107  10614  Jonathan 
    108   1544  eschrock 	uint64_t ena;
    109   1544  eschrock 	char class[64];
    110   1544  eschrock 
    111   1544  eschrock 	/*
    112  10921       Tim 	 * If we are doing a spa_tryimport() or in recovery mode,
    113  10921       Tim 	 * ignore errors.
    114   1544  eschrock 	 */
    115  11147    George 	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT ||
    116  11147    George 	    spa_load_state(spa) == SPA_LOAD_RECOVER)
    117   1544  eschrock 		return;
    118   1544  eschrock 
    119   1544  eschrock 	/*
    120   1544  eschrock 	 * If we are in the middle of opening a pool, and the previous attempt
    121   1544  eschrock 	 * failed, don't bother logging any new ereports - we're just going to
    122   1544  eschrock 	 * get the same diagnosis anyway.
    123   1544  eschrock 	 */
    124  11147    George 	if (spa_load_state(spa) != SPA_LOAD_NONE &&
    125   1544  eschrock 	    spa->spa_last_open_failed)
    126   1544  eschrock 		return;
    127   1544  eschrock 
    128   6673  eschrock 	if (zio != NULL) {
    129   6673  eschrock 		/*
    130   6673  eschrock 		 * If this is not a read or write zio, ignore the error.  This
    131   6673  eschrock 		 * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
    132   6673  eschrock 		 */
    133   6673  eschrock 		if (zio->io_type != ZIO_TYPE_READ &&
    134   6673  eschrock 		    zio->io_type != ZIO_TYPE_WRITE)
    135   6673  eschrock 			return;
    136   6673  eschrock 
    137   6673  eschrock 		/*
    138   6673  eschrock 		 * Ignore any errors from speculative I/Os, as failure is an
    139   6673  eschrock 		 * expected result.
    140   6673  eschrock 		 */
    141   6673  eschrock 		if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
    142   9725      Eric 			return;
    143   9725      Eric 
    144   9725      Eric 		/*
    145   9725      Eric 		 * If this I/O is not a retry I/O, don't post an ereport.
    146   9725      Eric 		 * Otherwise, we risk making bad diagnoses based on B_FAILFAST
    147   9725      Eric 		 * I/Os.
    148   9725      Eric 		 */
    149   9725      Eric 		if (zio->io_error == EIO &&
    150   9725      Eric 		    !(zio->io_flags & ZIO_FLAG_IO_RETRY))
    151   6976  eschrock 			return;
    152   6976  eschrock 
    153   9425      Eric 		if (vd != NULL) {
    154   9425      Eric 			/*
    155   9425      Eric 			 * If the vdev has already been marked as failing due
    156   9425      Eric 			 * to a failed probe, then ignore any subsequent I/O
    157   9425      Eric 			 * errors, as the DE will automatically fault the vdev
    158   9425      Eric 			 * on the first such failure.  This also catches cases
    159   9425      Eric 			 * where vdev_remove_wanted is set and the device has
    160   9425      Eric 			 * not yet been asynchronously placed into the REMOVED
    161   9425      Eric 			 * state.
    162   9425      Eric 			 */
    163  10575      Eric 			if (zio->io_vd == vd && !vdev_accessible(vd, zio))
    164   9425      Eric 				return;
    165   9425      Eric 
    166   9425      Eric 			/*
    167   9425      Eric 			 * Ignore checksum errors for reads from DTL regions of
    168   9425      Eric 			 * leaf vdevs.
    169   9425      Eric 			 */
    170   9425      Eric 			if (zio->io_type == ZIO_TYPE_READ &&
    171   9425      Eric 			    zio->io_error == ECKSUM &&
    172   9425      Eric 			    vd->vdev_ops->vdev_op_leaf &&
    173   9425      Eric 			    vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1))
    174   9425      Eric 				return;
    175   9425      Eric 		}
    176   6673  eschrock 	}
    177  10575      Eric 
    178  10575      Eric 	/*
    179  10575      Eric 	 * For probe failure, we want to avoid posting ereports if we've
    180  10575      Eric 	 * already removed the device in the meantime.
    181  10575      Eric 	 */
    182  10575      Eric 	if (vd != NULL &&
    183  10575      Eric 	    strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 &&
    184  10575      Eric 	    (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED))
    185  10575      Eric 		return;
    186   1544  eschrock 
    187   1544  eschrock 	if ((ereport = fm_nvlist_create(NULL)) == NULL)
    188   1544  eschrock 		return;
    189   1544  eschrock 
    190   1544  eschrock 	if ((detector = fm_nvlist_create(NULL)) == NULL) {
    191   1544  eschrock 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
    192   1544  eschrock 		return;
    193   1544  eschrock 	}
    194   1544  eschrock 
    195   1544  eschrock 	/*
    196   1544  eschrock 	 * Serialize ereport generation
    197   1544  eschrock 	 */
    198   1544  eschrock 	mutex_enter(&spa->spa_errlist_lock);
    199   1544  eschrock 
    200   1544  eschrock 	/*
    201   1544  eschrock 	 * Determine the ENA to use for this event.  If we are in a loading
    202   1544  eschrock 	 * state, use a SPA-wide ENA.  Otherwise, if we are in an I/O state, use
    203   1544  eschrock 	 * a root zio-wide ENA.  Otherwise, simply use a unique ENA.
    204   1544  eschrock 	 */
    205  11147    George 	if (spa_load_state(spa) != SPA_LOAD_NONE) {
    206   1544  eschrock 		if (spa->spa_ena == 0)
    207   1544  eschrock 			spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
    208   1544  eschrock 		ena = spa->spa_ena;
    209   1544  eschrock 	} else if (zio != NULL && zio->io_logical != NULL) {
    210   1544  eschrock 		if (zio->io_logical->io_ena == 0)
    211   1544  eschrock 			zio->io_logical->io_ena =
    212   1544  eschrock 			    fm_ena_generate(0, FM_ENA_FMT1);
    213   1544  eschrock 		ena = zio->io_logical->io_ena;
    214   1544  eschrock 	} else {
    215   1544  eschrock 		ena = fm_ena_generate(0, FM_ENA_FMT1);
    216   1544  eschrock 	}
    217   1544  eschrock 
    218   1544  eschrock 	/*
    219   1544  eschrock 	 * Construct the full class, detector, and other standard FMA fields.
    220   1544  eschrock 	 */
    221   1544  eschrock 	(void) snprintf(class, sizeof (class), "%s.%s",
    222   1544  eschrock 	    ZFS_ERROR_CLASS, subclass);
    223   1544  eschrock 
    224   1544  eschrock 	fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa),
    225   1544  eschrock 	    vd != NULL ? vd->vdev_guid : 0);
    226   1544  eschrock 
    227   1544  eschrock 	fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL);
    228   1544  eschrock 
    229   1544  eschrock 	/*
    230   1544  eschrock 	 * Construct the per-ereport payload, depending on which parameters are
    231   1544  eschrock 	 * passed in.
    232   1544  eschrock 	 */
    233   1544  eschrock 
    234   1544  eschrock 	/*
    235   1544  eschrock 	 * Generic payload members common to all ereports.
    236   1544  eschrock 	 */
    237   1544  eschrock 	fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL,
    238   7754      Jeff 	    DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
    239   1544  eschrock 	    DATA_TYPE_UINT64, spa_guid(spa),
    240   1544  eschrock 	    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
    241  11147    George 	    spa_load_state(spa), NULL);
    242   6523  ek110237 
    243   6523  ek110237 	if (spa != NULL) {
    244   6523  ek110237 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
    245   6523  ek110237 		    DATA_TYPE_STRING,
    246   6523  ek110237 		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
    247   6523  ek110237 		    FM_EREPORT_FAILMODE_WAIT :
    248   6523  ek110237 		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
    249   6523  ek110237 		    FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
    250   6523  ek110237 		    NULL);
    251   6523  ek110237 	}
    252   1544  eschrock 
    253   1544  eschrock 	if (vd != NULL) {
    254   1544  eschrock 		vdev_t *pvd = vd->vdev_parent;
    255   1544  eschrock 
    256   1544  eschrock 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
    257   1544  eschrock 		    DATA_TYPE_UINT64, vd->vdev_guid,
    258   1544  eschrock 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
    259   1544  eschrock 		    DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL);
    260   9425      Eric 		if (vd->vdev_path != NULL)
    261   1544  eschrock 			fm_payload_set(ereport,
    262   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
    263   1544  eschrock 			    DATA_TYPE_STRING, vd->vdev_path, NULL);
    264   9425      Eric 		if (vd->vdev_devid != NULL)
    265   1544  eschrock 			fm_payload_set(ereport,
    266   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID,
    267   1544  eschrock 			    DATA_TYPE_STRING, vd->vdev_devid, NULL);
    268   9425      Eric 		if (vd->vdev_fru != NULL)
    269   9425      Eric 			fm_payload_set(ereport,
    270   9425      Eric 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
    271   9425      Eric 			    DATA_TYPE_STRING, vd->vdev_fru, NULL);
    272   1544  eschrock 
    273   1544  eschrock 		if (pvd != NULL) {
    274   1544  eschrock 			fm_payload_set(ereport,
    275   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID,
    276   1544  eschrock 			    DATA_TYPE_UINT64, pvd->vdev_guid,
    277   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE,
    278   1544  eschrock 			    DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type,
    279   1544  eschrock 			    NULL);
    280   1544  eschrock 			if (pvd->vdev_path)
    281   1544  eschrock 				fm_payload_set(ereport,
    282   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH,
    283   4831   gw25295 				    DATA_TYPE_STRING, pvd->vdev_path, NULL);
    284   1544  eschrock 			if (pvd->vdev_devid)
    285   1544  eschrock 				fm_payload_set(ereport,
    286   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
    287   1544  eschrock 				    DATA_TYPE_STRING, pvd->vdev_devid, NULL);
    288   1544  eschrock 		}
    289   1544  eschrock 	}
    290   1544  eschrock 
    291   1544  eschrock 	if (zio != NULL) {
    292   1544  eschrock 		/*
    293   1544  eschrock 		 * Payload common to all I/Os.
    294   1544  eschrock 		 */
    295   1544  eschrock 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
    296   1544  eschrock 		    DATA_TYPE_INT32, zio->io_error, NULL);
    297   1544  eschrock 
    298   1544  eschrock 		/*
    299   1544  eschrock 		 * If the 'size' parameter is non-zero, it indicates this is a
    300   1544  eschrock 		 * RAID-Z or other I/O where the physical offset and length are
    301   1544  eschrock 		 * provided for us, instead of within the zio_t.
    302   1544  eschrock 		 */
    303   1544  eschrock 		if (vd != NULL) {
    304   1544  eschrock 			if (size)
    305   1544  eschrock 				fm_payload_set(ereport,
    306   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
    307   1544  eschrock 				    DATA_TYPE_UINT64, stateoroffset,
    308   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
    309   1955  eschrock 				    DATA_TYPE_UINT64, size, NULL);
    310   1544  eschrock 			else
    311   1544  eschrock 				fm_payload_set(ereport,
    312   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
    313   1544  eschrock 				    DATA_TYPE_UINT64, zio->io_offset,
    314   1544  eschrock 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
    315   1955  eschrock 				    DATA_TYPE_UINT64, zio->io_size, NULL);
    316   1544  eschrock 		}
    317   1544  eschrock 
    318   1544  eschrock 		/*
    319   1544  eschrock 		 * Payload for I/Os with corresponding logical information.
    320   1544  eschrock 		 */
    321   1544  eschrock 		if (zio->io_logical != NULL)
    322   1544  eschrock 			fm_payload_set(ereport,
    323   6423   gw25295 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
    324   6423   gw25295 			    DATA_TYPE_UINT64,
    325   6423   gw25295 			    zio->io_logical->io_bookmark.zb_objset,
    326   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
    327   1544  eschrock 			    DATA_TYPE_UINT64,
    328   1544  eschrock 			    zio->io_logical->io_bookmark.zb_object,
    329   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
    330   4831   gw25295 			    DATA_TYPE_INT64,
    331   1544  eschrock 			    zio->io_logical->io_bookmark.zb_level,
    332   1544  eschrock 			    FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
    333   1544  eschrock 			    DATA_TYPE_UINT64,
    334   1955  eschrock 			    zio->io_logical->io_bookmark.zb_blkid, NULL);
    335   1544  eschrock 	} else if (vd != NULL) {
    336   1544  eschrock 		/*
    337   1544  eschrock 		 * If we have a vdev but no zio, this is a device fault, and the
    338   1544  eschrock 		 * 'stateoroffset' parameter indicates the previous state of the
    339   1544  eschrock 		 * vdev.
    340   1544  eschrock 		 */
    341   1544  eschrock 		fm_payload_set(ereport,
    342   1544  eschrock 		    FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
    343   1544  eschrock 		    DATA_TYPE_UINT64, stateoroffset, NULL);
    344   1544  eschrock 	}
    345  10921       Tim 
    346   1544  eschrock 	mutex_exit(&spa->spa_errlist_lock);
    347   1544  eschrock 
    348  10614  Jonathan 	*ereport_out = ereport;
    349  10614  Jonathan 	*detector_out = detector;
    350  10614  Jonathan }
    351  10614  Jonathan 
    352  10614  Jonathan /* if it's <= 128 bytes, save the corruption directly */
    353  10614  Jonathan #define	ZFM_MAX_INLINE		(128 / sizeof (uint64_t))
    354  10614  Jonathan 
    355  10614  Jonathan #define	MAX_RANGES		16
    356  10614  Jonathan 
    357  10614  Jonathan typedef struct zfs_ecksum_info {
    358  10614  Jonathan 	/* histograms of set and cleared bits by bit number in a 64-bit word */
    359  10614  Jonathan 	uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY];
    360  10614  Jonathan 	uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
    361  10614  Jonathan 
    362  10614  Jonathan 	/* inline arrays of bits set and cleared. */
    363  10614  Jonathan 	uint64_t zei_bits_set[ZFM_MAX_INLINE];
    364  10614  Jonathan 	uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
    365  10614  Jonathan 
    366  10614  Jonathan 	/*
    367  10614  Jonathan 	 * for each range, the number of bits set and cleared.  The Hamming
    368  10614  Jonathan 	 * distance between the good and bad buffers is the sum of them all.
    369  10614  Jonathan 	 */
    370  10614  Jonathan 	uint32_t zei_range_sets[MAX_RANGES];
    371  10614  Jonathan 	uint32_t zei_range_clears[MAX_RANGES];
    372  10614  Jonathan 
    373  10614  Jonathan 	struct zei_ranges {
    374  10614  Jonathan 		uint32_t	zr_start;
    375  10614  Jonathan 		uint32_t	zr_end;
    376  10614  Jonathan 	} zei_ranges[MAX_RANGES];
    377  10614  Jonathan 
    378  10614  Jonathan 	size_t	zei_range_count;
    379  10614  Jonathan 	uint32_t zei_mingap;
    380  10614  Jonathan 	uint32_t zei_allowed_mingap;
    381  10614  Jonathan 
    382  10614  Jonathan } zfs_ecksum_info_t;
    383  10614  Jonathan 
    384  10614  Jonathan static void
    385  10614  Jonathan update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
    386  10614  Jonathan {
    387  10614  Jonathan 	size_t i;
    388  10614  Jonathan 	size_t bits = 0;
    389  10614  Jonathan 	uint64_t value = BE_64(value_arg);
    390  10614  Jonathan 
    391  10614  Jonathan 	/* We store the bits in big-endian (largest-first) order */
    392  10614  Jonathan 	for (i = 0; i < 64; i++) {
    393  10614  Jonathan 		if (value & (1ull << i)) {
    394  10614  Jonathan 			hist[63 - i]++;
    395  10614  Jonathan 			++bits;
    396  10614  Jonathan 		}
    397  10614  Jonathan 	}
    398  10614  Jonathan 	/* update the count of bits changed */
    399  10614  Jonathan 	*count += bits;
    400  10614  Jonathan }
    401  10614  Jonathan 
    402  10614  Jonathan /*
    403  10614  Jonathan  * We've now filled up the range array, and need to increase "mingap" and
    404  10614  Jonathan  * shrink the range list accordingly.  zei_mingap is always the smallest
    405  10614  Jonathan  * distance between array entries, so we set the new_allowed_gap to be
    406  10614  Jonathan  * one greater than that.  We then go through the list, joining together
    407  10614  Jonathan  * any ranges which are closer than the new_allowed_gap.
    408  10614  Jonathan  *
    409  10614  Jonathan  * By construction, there will be at least one.  We also update zei_mingap
    410  10614  Jonathan  * to the new smallest gap, to prepare for our next invocation.
    411  10614  Jonathan  */
    412  10614  Jonathan static void
    413  10614  Jonathan shrink_ranges(zfs_ecksum_info_t *eip)
    414  10614  Jonathan {
    415  10614  Jonathan 	uint32_t mingap = UINT32_MAX;
    416  10614  Jonathan 	uint32_t new_allowed_gap = eip->zei_mingap + 1;
    417  10614  Jonathan 
    418  10614  Jonathan 	size_t idx, output;
    419  10614  Jonathan 	size_t max = eip->zei_range_count;
    420  10614  Jonathan 
    421  10614  Jonathan 	struct zei_ranges *r = eip->zei_ranges;
    422  10614  Jonathan 
    423  10614  Jonathan 	ASSERT3U(eip->zei_range_count, >, 0);
    424  10614  Jonathan 	ASSERT3U(eip->zei_range_count, <=, MAX_RANGES);
    425  10614  Jonathan 
    426  10614  Jonathan 	output = idx = 0;
    427  10614  Jonathan 	while (idx < max - 1) {
    428  10614  Jonathan 		uint32_t start = r[idx].zr_start;
    429  10614  Jonathan 		uint32_t end = r[idx].zr_end;
    430  10614  Jonathan 
    431  10614  Jonathan 		while (idx < max - 1) {
    432  10614  Jonathan 			idx++;
    433  10614  Jonathan 
    434  10614  Jonathan 			uint32_t nstart = r[idx].zr_start;
    435  10614  Jonathan 			uint32_t nend = r[idx].zr_end;
    436  10614  Jonathan 
    437  10614  Jonathan 			uint32_t gap = nstart - end;
    438  10614  Jonathan 			if (gap < new_allowed_gap) {
    439  10614  Jonathan 				end = nend;
    440  10614  Jonathan 				continue;
    441  10614  Jonathan 			}
    442  10614  Jonathan 			if (gap < mingap)
    443  10614  Jonathan 				mingap = gap;
    444  10614  Jonathan 			break;
    445  10614  Jonathan 		}
    446  10614  Jonathan 		r[output].zr_start = start;
    447  10614  Jonathan 		r[output].zr_end = end;
    448  10614  Jonathan 		output++;
    449  10614  Jonathan 	}
    450  10614  Jonathan 	ASSERT3U(output, <, eip->zei_range_count);
    451  10614  Jonathan 	eip->zei_range_count = output;
    452  10614  Jonathan 	eip->zei_mingap = mingap;
    453  10614  Jonathan 	eip->zei_allowed_mingap = new_allowed_gap;
    454  10614  Jonathan }
    455  10614  Jonathan 
    456  10614  Jonathan static void
    457  10614  Jonathan add_range(zfs_ecksum_info_t *eip, int start, int end)
    458  10614  Jonathan {
    459  10614  Jonathan 	struct zei_ranges *r = eip->zei_ranges;
    460  10614  Jonathan 	size_t count = eip->zei_range_count;
    461  10614  Jonathan 
    462  10614  Jonathan 	if (count >= MAX_RANGES) {
    463  10614  Jonathan 		shrink_ranges(eip);
    464  10614  Jonathan 		count = eip->zei_range_count;
    465  10614  Jonathan 	}
    466  10614  Jonathan 	if (count == 0) {
    467  10614  Jonathan 		eip->zei_mingap = UINT32_MAX;
    468  10614  Jonathan 		eip->zei_allowed_mingap = 1;
    469  10614  Jonathan 	} else {
    470  10614  Jonathan 		int gap = start - r[count - 1].zr_end;
    471  10614  Jonathan 
    472  10614  Jonathan 		if (gap < eip->zei_allowed_mingap) {
    473  10614  Jonathan 			r[count - 1].zr_end = end;
    474  10614  Jonathan 			return;
    475  10614  Jonathan 		}
    476  10614  Jonathan 		if (gap < eip->zei_mingap)
    477  10614  Jonathan 			eip->zei_mingap = gap;
    478  10614  Jonathan 	}
    479  10614  Jonathan 	r[count].zr_start = start;
    480  10614  Jonathan 	r[count].zr_end = end;
    481  10614  Jonathan 	eip->zei_range_count++;
    482  10614  Jonathan }
    483  10614  Jonathan 
    484  10614  Jonathan static size_t
    485  10614  Jonathan range_total_size(zfs_ecksum_info_t *eip)
    486  10614  Jonathan {
    487  10614  Jonathan 	struct zei_ranges *r = eip->zei_ranges;
    488  10614  Jonathan 	size_t count = eip->zei_range_count;
    489  10614  Jonathan 	size_t result = 0;
    490  10614  Jonathan 	size_t idx;
    491  10614  Jonathan 
    492  10614  Jonathan 	for (idx = 0; idx < count; idx++)
    493  10614  Jonathan 		result += (r[idx].zr_end - r[idx].zr_start);
    494  10614  Jonathan 
    495  10614  Jonathan 	return (result);
    496  10614  Jonathan }
    497  10614  Jonathan 
    498  10614  Jonathan static zfs_ecksum_info_t *
    499  10614  Jonathan annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
    500  10614  Jonathan     const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
    501  10614  Jonathan     boolean_t drop_if_identical)
    502  10614  Jonathan {
    503  10614  Jonathan 	const uint64_t *good = (const uint64_t *)goodbuf;
    504  10614  Jonathan 	const uint64_t *bad = (const uint64_t *)badbuf;
    505  10614  Jonathan 
    506  10614  Jonathan 	uint64_t allset = 0;
    507  10614  Jonathan 	uint64_t allcleared = 0;
    508  10614  Jonathan 
    509  10614  Jonathan 	size_t nui64s = size / sizeof (uint64_t);
    510  10614  Jonathan 
    511  10614  Jonathan 	size_t inline_size;
    512  10614  Jonathan 	int no_inline = 0;
    513  10614  Jonathan 	size_t idx;
    514  10614  Jonathan 	size_t range;
    515  10614  Jonathan 
    516  10614  Jonathan 	size_t offset = 0;
    517  10614  Jonathan 	ssize_t start = -1;
    518  10614  Jonathan 
    519  10614  Jonathan 	zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP);
    520  10614  Jonathan 
    521  10614  Jonathan 	/* don't do any annotation for injected checksum errors */
    522  10614  Jonathan 	if (info != NULL && info->zbc_injected)
    523  10614  Jonathan 		return (eip);
    524  10614  Jonathan 
    525  10614  Jonathan 	if (info != NULL && info->zbc_has_cksum) {
    526  10614  Jonathan 		fm_payload_set(ereport,
    527  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
    528  10614  Jonathan 		    DATA_TYPE_UINT64_ARRAY,
    529  10614  Jonathan 		    sizeof (info->zbc_expected) / sizeof (uint64_t),
    530  10614  Jonathan 		    (uint64_t *)&info->zbc_expected,
    531  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
    532  10614  Jonathan 		    DATA_TYPE_UINT64_ARRAY,
    533  10614  Jonathan 		    sizeof (info->zbc_actual) / sizeof (uint64_t),
    534  10614  Jonathan 		    (uint64_t *)&info->zbc_actual,
    535  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
    536  10614  Jonathan 		    DATA_TYPE_STRING,
    537  10614  Jonathan 		    info->zbc_checksum_name,
    538  10614  Jonathan 		    NULL);
    539  10614  Jonathan 
    540  10614  Jonathan 		if (info->zbc_byteswapped) {
    541  10614  Jonathan 			fm_payload_set(ereport,
    542  10614  Jonathan 			    FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
    543  10614  Jonathan 			    DATA_TYPE_BOOLEAN, 1,
    544  10614  Jonathan 			    NULL);
    545  10614  Jonathan 		}
    546  10614  Jonathan 	}
    547  10614  Jonathan 
    548  10614  Jonathan 	if (badbuf == NULL || goodbuf == NULL)
    549  10614  Jonathan 		return (eip);
    550  10614  Jonathan 
    551  10614  Jonathan 	ASSERT3U(nui64s, <=, UINT16_MAX);
    552  10614  Jonathan 	ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
    553  10614  Jonathan 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
    554  10614  Jonathan 	ASSERT3U(size, <=, UINT32_MAX);
    555  10614  Jonathan 
    556  10614  Jonathan 	/* build up the range list by comparing the two buffers. */
    557  10614  Jonathan 	for (idx = 0; idx < nui64s; idx++) {
    558  10614  Jonathan 		if (good[idx] == bad[idx]) {
    559  10614  Jonathan 			if (start == -1)
    560  10614  Jonathan 				continue;
    561  10614  Jonathan 
    562  10614  Jonathan 			add_range(eip, start, idx);
    563  10614  Jonathan 			start = -1;
    564  10614  Jonathan 		} else {
    565  10614  Jonathan 			if (start != -1)
    566  10614  Jonathan 				continue;
    567  10614  Jonathan 
    568  10614  Jonathan 			start = idx;
    569  10614  Jonathan 		}
    570  10614  Jonathan 	}
    571  10614  Jonathan 	if (start != -1)
    572  10614  Jonathan 		add_range(eip, start, idx);
    573  10614  Jonathan 
    574  10614  Jonathan 	/* See if it will fit in our inline buffers */
    575  10614  Jonathan 	inline_size = range_total_size(eip);
    576  10614  Jonathan 	if (inline_size > ZFM_MAX_INLINE)
    577  10614  Jonathan 		no_inline = 1;
    578  10614  Jonathan 
    579  10614  Jonathan 	/*
    580  10614  Jonathan 	 * If there is no change and we want to drop if the buffers are
    581  10614  Jonathan 	 * identical, do so.
    582  10614  Jonathan 	 */
    583  10614  Jonathan 	if (inline_size == 0 && drop_if_identical) {
    584  10614  Jonathan 		kmem_free(eip, sizeof (*eip));
    585  10614  Jonathan 		return (NULL);
    586  10614  Jonathan 	}
    587  10614  Jonathan 
    588  10614  Jonathan 	/*
    589  10614  Jonathan 	 * Now walk through the ranges, filling in the details of the
    590  10614  Jonathan 	 * differences.  Also convert our uint64_t-array offsets to byte
    591  10614  Jonathan 	 * offsets.
    592  10614  Jonathan 	 */
    593  10614  Jonathan 	for (range = 0; range < eip->zei_range_count; range++) {
    594  10614  Jonathan 		size_t start = eip->zei_ranges[range].zr_start;
    595  10614  Jonathan 		size_t end = eip->zei_ranges[range].zr_end;
    596  10614  Jonathan 
    597  10614  Jonathan 		for (idx = start; idx < end; idx++) {
    598  10614  Jonathan 			uint64_t set, cleared;
    599  10614  Jonathan 
    600  10614  Jonathan 			// bits set in bad, but not in good
    601  10614  Jonathan 			set = ((~good[idx]) & bad[idx]);
    602  10614  Jonathan 			// bits set in good, but not in bad
    603  10614  Jonathan 			cleared = (good[idx] & (~bad[idx]));
    604  10614  Jonathan 
    605  10614  Jonathan 			allset |= set;
    606  10614  Jonathan 			allcleared |= cleared;
    607  10614  Jonathan 
    608  10614  Jonathan 			if (!no_inline) {
    609  10614  Jonathan 				ASSERT3U(offset, <, inline_size);
    610  10614  Jonathan 				eip->zei_bits_set[offset] = set;
    611  10614  Jonathan 				eip->zei_bits_cleared[offset] = cleared;
    612  10614  Jonathan 				offset++;
    613  10614  Jonathan 			}
    614  10614  Jonathan 
    615  10614  Jonathan 			update_histogram(set, eip->zei_histogram_set,
    616  10614  Jonathan 			    &eip->zei_range_sets[range]);
    617  10614  Jonathan 			update_histogram(cleared, eip->zei_histogram_cleared,
    618  10614  Jonathan 			    &eip->zei_range_clears[range]);
    619  10614  Jonathan 		}
    620  10614  Jonathan 
    621  10614  Jonathan 		/* convert to byte offsets */
    622  10614  Jonathan 		eip->zei_ranges[range].zr_start	*= sizeof (uint64_t);
    623  10614  Jonathan 		eip->zei_ranges[range].zr_end	*= sizeof (uint64_t);
    624  10614  Jonathan 	}
    625  10614  Jonathan 	eip->zei_allowed_mingap	*= sizeof (uint64_t);
    626  10614  Jonathan 	inline_size		*= sizeof (uint64_t);
    627  10614  Jonathan 
    628  10614  Jonathan 	/* fill in ereport */
    629  10614  Jonathan 	fm_payload_set(ereport,
    630  10614  Jonathan 	    FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES,
    631  10614  Jonathan 	    DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count,
    632  10614  Jonathan 	    (uint32_t *)eip->zei_ranges,
    633  10614  Jonathan 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
    634  10614  Jonathan 	    DATA_TYPE_UINT32, eip->zei_allowed_mingap,
    635  10614  Jonathan 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
    636  10614  Jonathan 	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
    637  10614  Jonathan 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
    638  10614  Jonathan 	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
    639  10614  Jonathan 	    NULL);
    640  10614  Jonathan 
    641  10614  Jonathan 	if (!no_inline) {
    642  10614  Jonathan 		fm_payload_set(ereport,
    643  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
    644  10614  Jonathan 		    DATA_TYPE_UINT8_ARRAY,
    645  10614  Jonathan 		    inline_size, (uint8_t *)eip->zei_bits_set,
    646  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
    647  10614  Jonathan 		    DATA_TYPE_UINT8_ARRAY,
    648  10614  Jonathan 		    inline_size, (uint8_t *)eip->zei_bits_cleared,
    649  10614  Jonathan 		    NULL);
    650  10614  Jonathan 	} else {
    651  10614  Jonathan 		fm_payload_set(ereport,
    652  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
    653  10614  Jonathan 		    DATA_TYPE_UINT16_ARRAY,
    654  10614  Jonathan 		    NBBY * sizeof (uint64_t), eip->zei_histogram_set,
    655  10614  Jonathan 		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
    656  10614  Jonathan 		    DATA_TYPE_UINT16_ARRAY,
    657  10614  Jonathan 		    NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
    658  10614  Jonathan 		    NULL);
    659  10614  Jonathan 	}
    660  10614  Jonathan 	return (eip);
    661  10614  Jonathan }
    662  10614  Jonathan #endif
    663  10614  Jonathan 
    664  10614  Jonathan void
    665  10614  Jonathan zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
    666  10614  Jonathan     uint64_t stateoroffset, uint64_t size)
    667  10614  Jonathan {
    668  10614  Jonathan #ifdef _KERNEL
    669  10614  Jonathan 	nvlist_t *ereport = NULL;
    670  10614  Jonathan 	nvlist_t *detector = NULL;
    671  10614  Jonathan 
    672  10614  Jonathan 	zfs_ereport_start(&ereport, &detector,
    673  10614  Jonathan 	    subclass, spa, vd, zio, stateoroffset, size);
    674  10614  Jonathan 
    675  10614  Jonathan 	if (ereport == NULL)
    676  10614  Jonathan 		return;
    677  10614  Jonathan 
    678   1544  eschrock 	fm_ereport_post(ereport, EVCH_SLEEP);
    679   1544  eschrock 
    680   1544  eschrock 	fm_nvlist_destroy(ereport, FM_NVA_FREE);
    681   1544  eschrock 	fm_nvlist_destroy(detector, FM_NVA_FREE);
    682  10614  Jonathan #endif
    683  10614  Jonathan }
    684  10614  Jonathan 
    685  10614  Jonathan void
    686  10614  Jonathan zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
    687  10614  Jonathan     struct zio *zio, uint64_t offset, uint64_t length, void *arg,
    688  10614  Jonathan     zio_bad_cksum_t *info)
    689  10614  Jonathan {
    690  10614  Jonathan 	zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP);
    691  10614  Jonathan 
    692  10614  Jonathan 	if (zio->io_vsd != NULL)
    693  10614  Jonathan 		zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
    694  10614  Jonathan 	else
    695  10614  Jonathan 		zio_vsd_default_cksum_report(zio, report, arg);
    696  10614  Jonathan 
    697  10614  Jonathan 	/* copy the checksum failure information if it was provided */
    698  10614  Jonathan 	if (info != NULL) {
    699  10614  Jonathan 		report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP);
    700  10614  Jonathan 		bcopy(info, report->zcr_ckinfo, sizeof (*info));
    701  10614  Jonathan 	}
    702  10614  Jonathan 
    703  10922      Jeff 	report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift;
    704  10614  Jonathan 	report->zcr_length = length;
    705  10614  Jonathan 
    706  10614  Jonathan #ifdef _KERNEL
    707  10614  Jonathan 	zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
    708  10614  Jonathan 	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
    709  10614  Jonathan 
    710  10614  Jonathan 	if (report->zcr_ereport == NULL) {
    711  10614  Jonathan 		report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
    712  10614  Jonathan 		kmem_free(report, sizeof (*report));
    713  10614  Jonathan 		return;
    714  10614  Jonathan 	}
    715  10614  Jonathan #endif
    716  10614  Jonathan 
    717  10614  Jonathan 	mutex_enter(&spa->spa_errlist_lock);
    718  10614  Jonathan 	report->zcr_next = zio->io_logical->io_cksum_report;
    719  10614  Jonathan 	zio->io_logical->io_cksum_report = report;
    720  10614  Jonathan 	mutex_exit(&spa->spa_errlist_lock);
    721  10614  Jonathan }
    722  10614  Jonathan 
    723  10614  Jonathan void
    724  10614  Jonathan zfs_ereport_finish_checksum(zio_cksum_report_t *report,
    725  10614  Jonathan     const void *good_data, const void *bad_data, boolean_t drop_if_identical)
    726  10614  Jonathan {
    727  10614  Jonathan #ifdef _KERNEL
    728  10614  Jonathan 	zfs_ecksum_info_t *info = NULL;
    729  10614  Jonathan 	info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo,
    730  10614  Jonathan 	    good_data, bad_data, report->zcr_length, drop_if_identical);
    731  10614  Jonathan 
    732  10614  Jonathan 	if (info != NULL)
    733  10614  Jonathan 		fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
    734  10614  Jonathan 
    735  10614  Jonathan 	fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE);
    736  10614  Jonathan 	fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE);
    737  10614  Jonathan 	report->zcr_ereport = report->zcr_detector = NULL;
    738  10614  Jonathan 
    739  10614  Jonathan 	if (info != NULL)
    740  10614  Jonathan 		kmem_free(info, sizeof (*info));
    741  10614  Jonathan #endif
    742  10614  Jonathan }
    743  10614  Jonathan 
    744  10614  Jonathan void
    745  10614  Jonathan zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
    746  10614  Jonathan {
    747  10614  Jonathan #ifdef _KERNEL
    748  10614  Jonathan 	if (rpt->zcr_ereport != NULL) {
    749  10614  Jonathan 		fm_nvlist_destroy(rpt->zcr_ereport,
    750  10614  Jonathan 		    FM_NVA_FREE);
    751  10614  Jonathan 		fm_nvlist_destroy(rpt->zcr_detector,
    752  10614  Jonathan 		    FM_NVA_FREE);
    753  10614  Jonathan 	}
    754  10614  Jonathan #endif
    755  10614  Jonathan 	rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo);
    756  10614  Jonathan 
    757  10614  Jonathan 	if (rpt->zcr_ckinfo != NULL)
    758  10614  Jonathan 		kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo));
    759  10614  Jonathan 
    760  10614  Jonathan 	kmem_free(rpt, sizeof (*rpt));
    761  10614  Jonathan }
    762  10614  Jonathan 
    763  10614  Jonathan void
    764  10614  Jonathan zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
    765  10614  Jonathan {
    766  10614  Jonathan #ifdef _KERNEL
    767  10614  Jonathan 	fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
    768  10614  Jonathan #endif
    769  10614  Jonathan }
    770  10614  Jonathan 
    771  10614  Jonathan void
    772  10614  Jonathan zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
    773  10614  Jonathan     struct zio *zio, uint64_t offset, uint64_t length,
    774  10614  Jonathan     const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
    775  10614  Jonathan {
    776  10614  Jonathan #ifdef _KERNEL
    777  10614  Jonathan 	nvlist_t *ereport = NULL;
    778  10614  Jonathan 	nvlist_t *detector = NULL;
    779  10614  Jonathan 	zfs_ecksum_info_t *info;
    780  10614  Jonathan 
    781  10614  Jonathan 	zfs_ereport_start(&ereport, &detector,
    782  10614  Jonathan 	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
    783  10614  Jonathan 
    784  10614  Jonathan 	if (ereport == NULL)
    785  10614  Jonathan 		return;
    786  10614  Jonathan 
    787  10614  Jonathan 	info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
    788  10614  Jonathan 	    B_FALSE);
    789  10614  Jonathan 
    790  10614  Jonathan 	if (info != NULL)
    791  10614  Jonathan 		fm_ereport_post(ereport, EVCH_SLEEP);
    792  10614  Jonathan 
    793  10614  Jonathan 	fm_nvlist_destroy(ereport, FM_NVA_FREE);
    794  10614  Jonathan 	fm_nvlist_destroy(detector, FM_NVA_FREE);
    795  10614  Jonathan 
    796  10614  Jonathan 	if (info != NULL)
    797  10614  Jonathan 		kmem_free(info, sizeof (*info));
    798   1544  eschrock #endif
    799   1544  eschrock }
    800   1544  eschrock 
    801   4451  eschrock static void
    802   4451  eschrock zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
    803   1544  eschrock {
    804   1544  eschrock #ifdef _KERNEL
    805   1544  eschrock 	nvlist_t *resource;
    806   1544  eschrock 	char class[64];
    807   1544  eschrock 
    808  11147    George 	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
    809  10575      Eric 		return;
    810  10575      Eric 
    811   1544  eschrock 	if ((resource = fm_nvlist_create(NULL)) == NULL)
    812   1544  eschrock 		return;
    813   1544  eschrock 
    814   1544  eschrock 	(void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE,
    815   4451  eschrock 	    ZFS_ERROR_CLASS, name);
    816   1544  eschrock 	VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0);
    817   1544  eschrock 	VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
    818   1544  eschrock 	VERIFY(nvlist_add_uint64(resource,
    819   1544  eschrock 	    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
    820   1544  eschrock 	if (vd)
    821   1544  eschrock 		VERIFY(nvlist_add_uint64(resource,
    822   1544  eschrock 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
    823   1544  eschrock 
    824   1544  eschrock 	fm_ereport_post(resource, EVCH_SLEEP);
    825   1544  eschrock 
    826   1544  eschrock 	fm_nvlist_destroy(resource, FM_NVA_FREE);
    827   1544  eschrock #endif
    828   1544  eschrock }
    829   4451  eschrock 
    830   4451  eschrock /*
    831   4451  eschrock  * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev
    832   4451  eschrock  * has been removed from the system.  This will cause the DE to ignore any
    833   4451  eschrock  * recent I/O errors, inferring that they are due to the asynchronous device
    834   4451  eschrock  * removal.
    835   4451  eschrock  */
    836   4451  eschrock void
    837   4451  eschrock zfs_post_remove(spa_t *spa, vdev_t *vd)
    838   4451  eschrock {
    839   4451  eschrock 	zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
    840   4451  eschrock }
    841   4451  eschrock 
    842   4451  eschrock /*
    843   4451  eschrock  * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool
    844   4451  eschrock  * has the 'autoreplace' property set, and therefore any broken vdevs will be
    845   4451  eschrock  * handled by higher level logic, and no vdev fault should be generated.
    846   4451  eschrock  */
    847   4451  eschrock void
    848   4451  eschrock zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
    849   4451  eschrock {
    850   4451  eschrock 	zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
    851   4451  eschrock }
    852  10817      Eric 
    853  10817      Eric /*
    854  10817      Eric  * The 'resource.fs.zfs.statechange' event is an internal signal that the
    855  10817      Eric  * given vdev has transitioned its state to DEGRADED or HEALTHY.  This will
    856  10817      Eric  * cause the retire agent to repair any outstanding fault management cases
    857  10817      Eric  * open because the device was not found (fault.fs.zfs.device).
    858  10817      Eric  */
    859  10817      Eric void
    860  10817      Eric zfs_post_state_change(spa_t *spa, vdev_t *vd)
    861  10817      Eric {
    862  10817      Eric 	zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE);
    863  10817      Eric }
    864