Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   1489   webaker  * Common Development and Distribution License (the "License").
      6   1489   webaker  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21    789    ahrens /*
     22   8876       Lin  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    789    ahrens  * Use is subject to license terms.
     24    789    ahrens  */
     25    789    ahrens 
     26    789    ahrens #include <sys/zfs_context.h>
     27    789    ahrens #include <sys/spa.h>
     28   6423   gw25295 #include <sys/refcount.h>
     29    789    ahrens #include <sys/vdev_disk.h>
     30    789    ahrens #include <sys/vdev_impl.h>
     31    789    ahrens #include <sys/fs/zfs.h>
     32    789    ahrens #include <sys/zio.h>
     33   1171  eschrock #include <sys/sunldi.h>
     34   6976  eschrock #include <sys/fm/fs/zfs.h>
     35    789    ahrens 
     36    789    ahrens /*
     37    789    ahrens  * Virtual device vector for disks.
     38    789    ahrens  */
     39    789    ahrens 
     40    789    ahrens extern ldi_ident_t zfs_li;
     41    789    ahrens 
     42    789    ahrens typedef struct vdev_disk_buf {
     43    789    ahrens 	buf_t	vdb_buf;
     44    789    ahrens 	zio_t	*vdb_io;
     45    789    ahrens } vdev_disk_buf_t;
     46    789    ahrens 
     47    789    ahrens static int
     48   7754      Jeff vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
     49    789    ahrens {
     50   8241      Jeff 	spa_t *spa = vd->vdev_spa;
     51    789    ahrens 	vdev_disk_t *dvd;
     52   7754      Jeff 	struct dk_minfo dkm;
     53   7754      Jeff 	int error;
     54   5329   gw25295 	dev_t dev;
     55   7754      Jeff 	int otyp;
     56    789    ahrens 
     57    789    ahrens 	/*
     58    789    ahrens 	 * We must have a pathname, and it must be absolute.
     59    789    ahrens 	 */
     60    789    ahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
     61    789    ahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
     62    789    ahrens 		return (EINVAL);
     63    789    ahrens 	}
     64    789    ahrens 
     65  10850    George 	/*
     66  10850    George 	 * Reopen the device if it's not currently open. Otherwise,
     67  10850    George 	 * just update the physical size of the device.
     68  10850    George 	 */
     69  10850    George 	if (vd->vdev_tsd != NULL) {
     70  10850    George 		ASSERT(vd->vdev_reopening);
     71  10850    George 		dvd = vd->vdev_tsd;
     72  10850    George 		goto skip_open;
     73  10850    George 	}
     74  10850    George 
     75    789    ahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
     76    789    ahrens 
     77    789    ahrens 	/*
     78    789    ahrens 	 * When opening a disk device, we want to preserve the user's original
     79    789    ahrens 	 * intent.  We always want to open the device by the path the user gave
     80    789    ahrens 	 * us, even if it is one of multiple paths to the save device.  But we
     81    789    ahrens 	 * also want to be able to survive disks being removed/recabled.
     82    789    ahrens 	 * Therefore the sequence of opening devices is:
     83    789    ahrens 	 *
     84   1171  eschrock 	 * 1. Try opening the device by path.  For legacy pools without the
     85   1171  eschrock 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
     86    789    ahrens 	 *
     87    789    ahrens 	 * 2. If the devid of the device matches the stored value, return
     88    789    ahrens 	 *    success.
     89    789    ahrens 	 *
     90    789    ahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
     91    789    ahrens 	 *    by the devid instead.
     92    789    ahrens 	 */
     93    789    ahrens 	if (vd->vdev_devid != NULL) {
     94    789    ahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
     95    789    ahrens 		    &dvd->vd_minor) != 0) {
     96    789    ahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
     97    789    ahrens 			return (EINVAL);
     98    789    ahrens 		}
     99    789    ahrens 	}
    100    789    ahrens 
    101    789    ahrens 	error = EINVAL;		/* presume failure */
    102    789    ahrens 
    103  10850    George 	if (vd->vdev_path != NULL) {
    104    789    ahrens 		ddi_devid_t devid;
    105    789    ahrens 
    106   1171  eschrock 		if (vd->vdev_wholedisk == -1ULL) {
    107   1171  eschrock 			size_t len = strlen(vd->vdev_path) + 3;
    108   1171  eschrock 			char *buf = kmem_alloc(len, KM_SLEEP);
    109   1171  eschrock 			ldi_handle_t lh;
    110    789    ahrens 
    111   1171  eschrock 			(void) snprintf(buf, len, "%ss0", vd->vdev_path);
    112    789    ahrens 
    113   8241      Jeff 			if (ldi_open_by_name(buf, spa_mode(spa), kcred,
    114   1171  eschrock 			    &lh, zfs_li) == 0) {
    115   1171  eschrock 				spa_strfree(vd->vdev_path);
    116   1171  eschrock 				vd->vdev_path = buf;
    117   1171  eschrock 				vd->vdev_wholedisk = 1ULL;
    118   8241      Jeff 				(void) ldi_close(lh, spa_mode(spa), kcred);
    119   1171  eschrock 			} else {
    120   1171  eschrock 				kmem_free(buf, len);
    121   1171  eschrock 			}
    122   1171  eschrock 		}
    123   1171  eschrock 
    124   8241      Jeff 		error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
    125   1171  eschrock 		    &dvd->vd_lh, zfs_li);
    126    789    ahrens 
    127    789    ahrens 		/*
    128    789    ahrens 		 * Compare the devid to the stored value.
    129    789    ahrens 		 */
    130    789    ahrens 		if (error == 0 && vd->vdev_devid != NULL &&
    131    789    ahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
    132    789    ahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
    133    789    ahrens 				error = EINVAL;
    134   8241      Jeff 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
    135   8241      Jeff 				    kcred);
    136    789    ahrens 				dvd->vd_lh = NULL;
    137    789    ahrens 			}
    138    789    ahrens 			ddi_devid_free(devid);
    139    789    ahrens 		}
    140   1171  eschrock 
    141   1171  eschrock 		/*
    142   1171  eschrock 		 * If we succeeded in opening the device, but 'vdev_wholedisk'
    143   1171  eschrock 		 * is not yet set, then this must be a slice.
    144   1171  eschrock 		 */
    145   1171  eschrock 		if (error == 0 && vd->vdev_wholedisk == -1ULL)
    146   1171  eschrock 			vd->vdev_wholedisk = 0;
    147    789    ahrens 	}
    148    789    ahrens 
    149    789    ahrens 	/*
    150    789    ahrens 	 * If we were unable to open by path, or the devid check fails, open by
    151    789    ahrens 	 * devid instead.
    152    789    ahrens 	 */
    153    789    ahrens 	if (error != 0 && vd->vdev_devid != NULL)
    154    789    ahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
    155   8241      Jeff 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
    156    789    ahrens 
    157   4451  eschrock 	/*
    158   4451  eschrock 	 * If all else fails, then try opening by physical path (if available)
    159   4451  eschrock 	 * or the logical path (if we failed due to the devid check).  While not
    160   4451  eschrock 	 * as reliable as the devid, this will give us something, and the higher
    161   4451  eschrock 	 * level vdev validation will prevent us from opening the wrong device.
    162   4451  eschrock 	 */
    163   4451  eschrock 	if (error) {
    164   4451  eschrock 		if (vd->vdev_physpath != NULL &&
    165   8269      Mark 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
    166   8241      Jeff 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
    167   4451  eschrock 			    kcred, &dvd->vd_lh, zfs_li);
    168   4451  eschrock 
    169   4451  eschrock 		/*
    170   4451  eschrock 		 * Note that we don't support the legacy auto-wholedisk support
    171   4451  eschrock 		 * as above.  This hasn't been used in a very long time and we
    172   4451  eschrock 		 * don't need to propagate its oddities to this edge condition.
    173   4451  eschrock 		 */
    174  10850    George 		if (error && vd->vdev_path != NULL)
    175   8241      Jeff 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
    176   8241      Jeff 			    kcred, &dvd->vd_lh, zfs_li);
    177   4451  eschrock 	}
    178   4451  eschrock 
    179   7754      Jeff 	if (error) {
    180    789    ahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
    181   7754      Jeff 		return (error);
    182   7754      Jeff 	}
    183   5329   gw25295 
    184   4451  eschrock 	/*
    185   4451  eschrock 	 * Once a device is opened, verify that the physical device path (if
    186   4451  eschrock 	 * available) is up to date.
    187   4451  eschrock 	 */
    188   4451  eschrock 	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
    189   4451  eschrock 	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
    190   5329   gw25295 		char *physpath, *minorname;
    191   5329   gw25295 
    192   4451  eschrock 		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    193   4451  eschrock 		minorname = NULL;
    194   4451  eschrock 		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
    195   4451  eschrock 		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
    196   4451  eschrock 		    (vd->vdev_physpath == NULL ||
    197   4451  eschrock 		    strcmp(vd->vdev_physpath, physpath) != 0)) {
    198   4451  eschrock 			if (vd->vdev_physpath)
    199   4451  eschrock 				spa_strfree(vd->vdev_physpath);
    200   4451  eschrock 			(void) strlcat(physpath, ":", MAXPATHLEN);
    201   4451  eschrock 			(void) strlcat(physpath, minorname, MAXPATHLEN);
    202   4451  eschrock 			vd->vdev_physpath = spa_strdup(physpath);
    203   4451  eschrock 		}
    204   4451  eschrock 		if (minorname)
    205   4451  eschrock 			kmem_free(minorname, strlen(minorname) + 1);
    206   4451  eschrock 		kmem_free(physpath, MAXPATHLEN);
    207    789    ahrens 	}
    208    789    ahrens 
    209  10850    George skip_open:
    210    789    ahrens 	/*
    211    789    ahrens 	 * Determine the actual size of the device.
    212    789    ahrens 	 */
    213    789    ahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
    214    789    ahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
    215    789    ahrens 		return (EINVAL);
    216    789    ahrens 	}
    217    789    ahrens 
    218   1732   bonwick 	/*
    219   1732   bonwick 	 * If we own the whole disk, try to enable disk write caching.
    220   1732   bonwick 	 * We ignore errors because it's OK if we can't do it.
    221   1732   bonwick 	 */
    222   1732   bonwick 	if (vd->vdev_wholedisk == 1) {
    223   1732   bonwick 		int wce = 1;
    224   1732   bonwick 		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
    225   1732   bonwick 		    FKIOCTL, kcred, NULL);
    226   1732   bonwick 	}
    227   1489   webaker 
    228   1732   bonwick 	/*
    229   1732   bonwick 	 * Determine the device's minimum transfer size.
    230   1732   bonwick 	 * If the ioctl isn't supported, assume DEV_BSIZE.
    231   1732   bonwick 	 */
    232   1732   bonwick 	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
    233   1732   bonwick 	    FKIOCTL, kcred, NULL) != 0)
    234   1732   bonwick 		dkm.dki_lbsize = DEV_BSIZE;
    235   1489   webaker 
    236   1732   bonwick 	*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
    237    789    ahrens 
    238   1773  eschrock 	/*
    239   1773  eschrock 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
    240   1773  eschrock 	 * try again.
    241   1773  eschrock 	 */
    242   1773  eschrock 	vd->vdev_nowritecache = B_FALSE;
    243   1773  eschrock 
    244    789    ahrens 	return (0);
    245    789    ahrens }
    246    789    ahrens 
    247    789    ahrens static void
    248    789    ahrens vdev_disk_close(vdev_t *vd)
    249    789    ahrens {
    250    789    ahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
    251    789    ahrens 
    252  10850    George 	if (vd->vdev_reopening || dvd == NULL)
    253    789    ahrens 		return;
    254    789    ahrens 
    255    789    ahrens 	if (dvd->vd_minor != NULL)
    256    789    ahrens 		ddi_devid_str_free(dvd->vd_minor);
    257    789    ahrens 
    258    789    ahrens 	if (dvd->vd_devid != NULL)
    259    789    ahrens 		ddi_devid_free(dvd->vd_devid);
    260    789    ahrens 
    261    789    ahrens 	if (dvd->vd_lh != NULL)
    262   8241      Jeff 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
    263    789    ahrens 
    264    789    ahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
    265    789    ahrens 	vd->vdev_tsd = NULL;
    266    789    ahrens }
    267    789    ahrens 
    268   6423   gw25295 int
    269   6423   gw25295 vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size,
    270   6423   gw25295     uint64_t offset, int flags)
    271   6423   gw25295 {
    272   6423   gw25295 	buf_t *bp;
    273   6423   gw25295 	int error = 0;
    274   6423   gw25295 
    275   6423   gw25295 	if (vd_lh == NULL)
    276   6423   gw25295 		return (EINVAL);
    277   6423   gw25295 
    278   6423   gw25295 	ASSERT(flags & B_READ || flags & B_WRITE);
    279   6423   gw25295 
    280   6423   gw25295 	bp = getrbuf(KM_SLEEP);
    281   6423   gw25295 	bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
    282   6423   gw25295 	bp->b_bcount = size;
    283   6423   gw25295 	bp->b_un.b_addr = (void *)data;
    284   6423   gw25295 	bp->b_lblkno = lbtodb(offset);
    285   6423   gw25295 	bp->b_bufsize = size;
    286   6423   gw25295 
    287   6423   gw25295 	error = ldi_strategy(vd_lh, bp);
    288   6423   gw25295 	ASSERT(error == 0);
    289   6423   gw25295 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
    290   6423   gw25295 		error = EIO;
    291   6423   gw25295 	freerbuf(bp);
    292   6423   gw25295 
    293   6423   gw25295 	return (error);
    294   6423   gw25295 }
    295   6423   gw25295 
    296    789    ahrens static void
    297    789    ahrens vdev_disk_io_intr(buf_t *bp)
    298    789    ahrens {
    299    789    ahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
    300    789    ahrens 	zio_t *zio = vdb->vdb_io;
    301    789    ahrens 
    302   6976  eschrock 	/*
    303   6976  eschrock 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
    304   6976  eschrock 	 * Rather than teach the rest of the stack about other error
    305   6976  eschrock 	 * possibilities (EFAULT, etc), we normalize the error value here.
    306   6976  eschrock 	 */
    307   6976  eschrock 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
    308   6976  eschrock 
    309   6976  eschrock 	if (zio->io_error == 0 && bp->b_resid != 0)
    310    789    ahrens 		zio->io_error = EIO;
    311    789    ahrens 
    312    789    ahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
    313    789    ahrens 
    314   5530   bonwick 	zio_interrupt(zio);
    315    789    ahrens }
    316    789    ahrens 
    317    789    ahrens static void
    318   7762      Jeff vdev_disk_ioctl_free(zio_t *zio)
    319   7762      Jeff {
    320   7762      Jeff 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
    321   7762      Jeff }
    322   7762      Jeff 
    323  10614  Jonathan static const zio_vsd_ops_t vdev_disk_vsd_ops = {
    324  10614  Jonathan 	vdev_disk_ioctl_free,
    325  10614  Jonathan 	zio_vsd_default_cksum_report
    326  10614  Jonathan };
    327  10614  Jonathan 
    328   7762      Jeff static void
    329    789    ahrens vdev_disk_ioctl_done(void *zio_arg, int error)
    330    789    ahrens {
    331    789    ahrens 	zio_t *zio = zio_arg;
    332    789    ahrens 
    333    789    ahrens 	zio->io_error = error;
    334    789    ahrens 
    335   5530   bonwick 	zio_interrupt(zio);
    336    789    ahrens }
    337    789    ahrens 
    338   5530   bonwick static int
    339    789    ahrens vdev_disk_io_start(zio_t *zio)
    340    789    ahrens {
    341    789    ahrens 	vdev_t *vd = zio->io_vd;
    342    789    ahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
    343    789    ahrens 	vdev_disk_buf_t *vdb;
    344   7754      Jeff 	struct dk_callback *dkc;
    345    789    ahrens 	buf_t *bp;
    346   7754      Jeff 	int error;
    347    789    ahrens 
    348    789    ahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
    349    789    ahrens 		/* XXPOLICY */
    350   5329   gw25295 		if (!vdev_readable(vd)) {
    351    789    ahrens 			zio->io_error = ENXIO;
    352   5530   bonwick 			return (ZIO_PIPELINE_CONTINUE);
    353    789    ahrens 		}
    354    789    ahrens 
    355    789    ahrens 		switch (zio->io_cmd) {
    356    789    ahrens 
    357    789    ahrens 		case DKIOCFLUSHWRITECACHE:
    358    789    ahrens 
    359   2885    ahrens 			if (zfs_nocacheflush)
    360   2885    ahrens 				break;
    361   2885    ahrens 
    362   1773  eschrock 			if (vd->vdev_nowritecache) {
    363   1773  eschrock 				zio->io_error = ENOTSUP;
    364   1773  eschrock 				break;
    365   1773  eschrock 			}
    366   1773  eschrock 
    367   7754      Jeff 			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
    368  10614  Jonathan 			zio->io_vsd_ops = &vdev_disk_vsd_ops;
    369   7754      Jeff 
    370   7754      Jeff 			dkc->dkc_callback = vdev_disk_ioctl_done;
    371   7754      Jeff 			dkc->dkc_flag = FLUSH_VOLATILE;
    372   7754      Jeff 			dkc->dkc_cookie = zio;
    373    789    ahrens 
    374    789    ahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
    375   7754      Jeff 			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
    376    789    ahrens 
    377    789    ahrens 			if (error == 0) {
    378    789    ahrens 				/*
    379    789    ahrens 				 * The ioctl will be done asychronously,
    380    789    ahrens 				 * and will call vdev_disk_ioctl_done()
    381    789    ahrens 				 * upon completion.
    382    789    ahrens 				 */
    383   5530   bonwick 				return (ZIO_PIPELINE_STOP);
    384   5530   bonwick 			}
    385   5530   bonwick 
    386   5530   bonwick 			if (error == ENOTSUP || error == ENOTTY) {
    387   1773  eschrock 				/*
    388   4455    mishra 				 * If we get ENOTSUP or ENOTTY, we know that
    389   4455    mishra 				 * no future attempts will ever succeed.
    390   4455    mishra 				 * In this case we set a persistent bit so
    391   4455    mishra 				 * that we don't bother with the ioctl in the
    392   4455    mishra 				 * future.
    393   1773  eschrock 				 */
    394   1773  eschrock 				vd->vdev_nowritecache = B_TRUE;
    395    789    ahrens 			}
    396    789    ahrens 			zio->io_error = error;
    397   1773  eschrock 
    398    789    ahrens 			break;
    399    789    ahrens 
    400    789    ahrens 		default:
    401    789    ahrens 			zio->io_error = ENOTSUP;
    402    789    ahrens 		}
    403    789    ahrens 
    404   5530   bonwick 		return (ZIO_PIPELINE_CONTINUE);
    405    789    ahrens 	}
    406    789    ahrens 
    407    789    ahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
    408    789    ahrens 
    409    789    ahrens 	vdb->vdb_io = zio;
    410    789    ahrens 	bp = &vdb->vdb_buf;
    411    789    ahrens 
    412    789    ahrens 	bioinit(bp);
    413   7754      Jeff 	bp->b_flags = B_BUSY | B_NOCACHE |
    414   9725      Eric 	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
    415   9725      Eric 	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
    416   9725      Eric 		bp->b_flags |= B_FAILFAST;
    417    789    ahrens 	bp->b_bcount = zio->io_size;
    418    789    ahrens 	bp->b_un.b_addr = zio->io_data;
    419    789    ahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
    420    789    ahrens 	bp->b_bufsize = zio->io_size;
    421    789    ahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
    422    789    ahrens 
    423    789    ahrens 	/* ldi_strategy() will return non-zero only on programming errors */
    424   7754      Jeff 	VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
    425   5530   bonwick 
    426   5530   bonwick 	return (ZIO_PIPELINE_STOP);
    427    789    ahrens }
    428    789    ahrens 
    429   7754      Jeff static void
    430    789    ahrens vdev_disk_io_done(zio_t *zio)
    431    789    ahrens {
    432   7754      Jeff 	vdev_t *vd = zio->io_vd;
    433   4451  eschrock 
    434   4451  eschrock 	/*
    435   4451  eschrock 	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
    436   4451  eschrock 	 * the device has been removed.  If this is the case, then we trigger an
    437   5329   gw25295 	 * asynchronous removal of the device. Otherwise, probe the device and
    438   5369   gw25295 	 * make sure it's still accessible.
    439   4451  eschrock 	 */
    440  10575      Eric 	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
    441   5329   gw25295 		vdev_disk_t *dvd = vd->vdev_tsd;
    442   7754      Jeff 		int state = DKIO_NONE;
    443   5329   gw25295 
    444   7754      Jeff 		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
    445   7754      Jeff 		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
    446  10575      Eric 			/*
    447  10575      Eric 			 * We post the resource as soon as possible, instead of
    448  10575      Eric 			 * when the async removal actually happens, because the
    449  10575      Eric 			 * DE is using this information to discard previous I/O
    450  10575      Eric 			 * errors.
    451  10575      Eric 			 */
    452  10575      Eric 			zfs_post_remove(zio->io_spa, vd);
    453   4451  eschrock 			vd->vdev_remove_wanted = B_TRUE;
    454   4451  eschrock 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
    455   4451  eschrock 		}
    456   4451  eschrock 	}
    457    789    ahrens }
    458    789    ahrens 
    459    789    ahrens vdev_ops_t vdev_disk_ops = {
    460    789    ahrens 	vdev_disk_open,
    461    789    ahrens 	vdev_disk_close,
    462    789    ahrens 	vdev_default_asize,
    463    789    ahrens 	vdev_disk_io_start,
    464    789    ahrens 	vdev_disk_io_done,
    465    789    ahrens 	NULL,
    466    789    ahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
    467    789    ahrens 	B_TRUE			/* leaf vdev */
    468    789    ahrens };
    469   6423   gw25295 
    470   6423   gw25295 /*
    471   7147    taylor  * Given the root disk device devid or pathname, read the label from
    472   7147    taylor  * the device, and construct a configuration nvlist.
    473   6423   gw25295  */
    474   7539       Lin int
    475   7539       Lin vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
    476   6423   gw25295 {
    477   6423   gw25295 	ldi_handle_t vd_lh;
    478   6423   gw25295 	vdev_label_t *label;
    479   6423   gw25295 	uint64_t s, size;
    480   6423   gw25295 	int l;
    481   7147    taylor 	ddi_devid_t tmpdevid;
    482   7687       Lin 	int error = -1;
    483   7147    taylor 	char *minor_name;
    484   6423   gw25295 
    485   6423   gw25295 	/*
    486   6423   gw25295 	 * Read the device label and build the nvlist.
    487   6423   gw25295 	 */
    488   7687       Lin 	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
    489   7147    taylor 	    &minor_name) == 0) {
    490   7147    taylor 		error = ldi_open_by_devid(tmpdevid, minor_name,
    491   8241      Jeff 		    FREAD, kcred, &vd_lh, zfs_li);
    492   7147    taylor 		ddi_devid_free(tmpdevid);
    493   7147    taylor 		ddi_devid_str_free(minor_name);
    494   7147    taylor 	}
    495   7147    taylor 
    496   7687       Lin 	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
    497   7687       Lin 	    zfs_li)))
    498   7539       Lin 		return (error);
    499   6423   gw25295 
    500   6673  eschrock 	if (ldi_get_size(vd_lh, &s)) {
    501   6673  eschrock 		(void) ldi_close(vd_lh, FREAD, kcred);
    502   7539       Lin 		return (EIO);
    503   6673  eschrock 	}
    504   6423   gw25295 
    505   6423   gw25295 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
    506   6423   gw25295 	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
    507   6423   gw25295 
    508   9616      Eric 	*config = NULL;
    509   6423   gw25295 	for (l = 0; l < VDEV_LABELS; l++) {
    510   6423   gw25295 		uint64_t offset, state, txg = 0;
    511   6423   gw25295 
    512   6423   gw25295 		/* read vdev label */
    513   6423   gw25295 		offset = vdev_label_offset(size, l, 0);
    514   6423   gw25295 		if (vdev_disk_physio(vd_lh, (caddr_t)label,
    515   8876       Lin 		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
    516   6423   gw25295 			continue;
    517   6423   gw25295 
    518   6423   gw25295 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
    519   7539       Lin 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
    520   7539       Lin 			*config = NULL;
    521   6423   gw25295 			continue;
    522   6423   gw25295 		}
    523   6423   gw25295 
    524   7539       Lin 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
    525   6423   gw25295 		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
    526   7539       Lin 			nvlist_free(*config);
    527   7539       Lin 			*config = NULL;
    528   6423   gw25295 			continue;
    529   6423   gw25295 		}
    530   6423   gw25295 
    531   7539       Lin 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
    532   6423   gw25295 		    &txg) != 0 || txg == 0) {
    533   7539       Lin 			nvlist_free(*config);
    534   7539       Lin 			*config = NULL;
    535   6423   gw25295 			continue;
    536   6423   gw25295 		}
    537   6423   gw25295 
    538   6423   gw25295 		break;
    539   6423   gw25295 	}
    540   6423   gw25295 
    541   6423   gw25295 	kmem_free(label, sizeof (vdev_label_t));
    542   6673  eschrock 	(void) ldi_close(vd_lh, FREAD, kcred);
    543   9616      Eric 	if (*config == NULL)
    544   9616      Eric 		error = EIDRM;
    545   6673  eschrock 
    546   7539       Lin 	return (error);
    547   6423   gw25295 }
    548